ext4: Replace lock/unlock_super() with an explicit lock for the orphan list
[safe/jmp/linux-2.6] / fs / ext4 / super.c
1 /*
2  *  linux/fs/ext4/super.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/inode.c
12  *
13  *  Copyright (C) 1991, 1992  Linus Torvalds
14  *
15  *  Big-endian to little-endian byte-swapping/bitmaps by
16  *        David S. Miller (davem@caip.rutgers.edu), 1995
17  */
18
19 #include <linux/module.h>
20 #include <linux/string.h>
21 #include <linux/fs.h>
22 #include <linux/time.h>
23 #include <linux/vmalloc.h>
24 #include <linux/jbd2.h>
25 #include <linux/slab.h>
26 #include <linux/init.h>
27 #include <linux/blkdev.h>
28 #include <linux/parser.h>
29 #include <linux/smp_lock.h>
30 #include <linux/buffer_head.h>
31 #include <linux/exportfs.h>
32 #include <linux/vfs.h>
33 #include <linux/random.h>
34 #include <linux/mount.h>
35 #include <linux/namei.h>
36 #include <linux/quotaops.h>
37 #include <linux/seq_file.h>
38 #include <linux/proc_fs.h>
39 #include <linux/ctype.h>
40 #include <linux/marker.h>
41 #include <linux/log2.h>
42 #include <linux/crc16.h>
43 #include <asm/uaccess.h>
44
45 #include "ext4.h"
46 #include "ext4_jbd2.h"
47 #include "xattr.h"
48 #include "acl.h"
49 #include "namei.h"
50 #include "group.h"
51
52 struct proc_dir_entry *ext4_proc_root;
53 static struct kset *ext4_kset;
54
55 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
56                              unsigned long journal_devnum);
57 static int ext4_commit_super(struct super_block *sb, int sync);
58 static void ext4_mark_recovery_complete(struct super_block *sb,
59                                         struct ext4_super_block *es);
60 static void ext4_clear_journal_err(struct super_block *sb,
61                                    struct ext4_super_block *es);
62 static int ext4_sync_fs(struct super_block *sb, int wait);
63 static const char *ext4_decode_error(struct super_block *sb, int errno,
64                                      char nbuf[16]);
65 static int ext4_remount(struct super_block *sb, int *flags, char *data);
66 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
67 static int ext4_unfreeze(struct super_block *sb);
68 static void ext4_write_super(struct super_block *sb);
69 static int ext4_freeze(struct super_block *sb);
70
71
72 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
73                                struct ext4_group_desc *bg)
74 {
75         return le32_to_cpu(bg->bg_block_bitmap_lo) |
76                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
77                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
78 }
79
80 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
81                                struct ext4_group_desc *bg)
82 {
83         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
84                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
85                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
86 }
87
88 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
89                               struct ext4_group_desc *bg)
90 {
91         return le32_to_cpu(bg->bg_inode_table_lo) |
92                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
93                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
94 }
95
96 __u32 ext4_free_blks_count(struct super_block *sb,
97                               struct ext4_group_desc *bg)
98 {
99         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
100                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
101                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
102 }
103
104 __u32 ext4_free_inodes_count(struct super_block *sb,
105                               struct ext4_group_desc *bg)
106 {
107         return le16_to_cpu(bg->bg_free_inodes_count_lo) |
108                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
109                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
110 }
111
112 __u32 ext4_used_dirs_count(struct super_block *sb,
113                               struct ext4_group_desc *bg)
114 {
115         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
116                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
117                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
118 }
119
120 __u32 ext4_itable_unused_count(struct super_block *sb,
121                               struct ext4_group_desc *bg)
122 {
123         return le16_to_cpu(bg->bg_itable_unused_lo) |
124                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
125                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
126 }
127
128 void ext4_block_bitmap_set(struct super_block *sb,
129                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
130 {
131         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
132         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
133                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
134 }
135
136 void ext4_inode_bitmap_set(struct super_block *sb,
137                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
138 {
139         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
140         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
141                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
142 }
143
144 void ext4_inode_table_set(struct super_block *sb,
145                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
146 {
147         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
148         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
149                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
150 }
151
152 void ext4_free_blks_set(struct super_block *sb,
153                           struct ext4_group_desc *bg, __u32 count)
154 {
155         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
156         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
157                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
158 }
159
160 void ext4_free_inodes_set(struct super_block *sb,
161                           struct ext4_group_desc *bg, __u32 count)
162 {
163         bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
164         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
165                 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
166 }
167
168 void ext4_used_dirs_set(struct super_block *sb,
169                           struct ext4_group_desc *bg, __u32 count)
170 {
171         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
172         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
173                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
174 }
175
176 void ext4_itable_unused_set(struct super_block *sb,
177                           struct ext4_group_desc *bg, __u32 count)
178 {
179         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
180         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
181                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
182 }
183
184 /*
185  * Wrappers for jbd2_journal_start/end.
186  *
187  * The only special thing we need to do here is to make sure that all
188  * journal_end calls result in the superblock being marked dirty, so
189  * that sync() will call the filesystem's write_super callback if
190  * appropriate.
191  */
192 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
193 {
194         journal_t *journal;
195
196         if (sb->s_flags & MS_RDONLY)
197                 return ERR_PTR(-EROFS);
198
199         /* Special case here: if the journal has aborted behind our
200          * backs (eg. EIO in the commit thread), then we still need to
201          * take the FS itself readonly cleanly. */
202         journal = EXT4_SB(sb)->s_journal;
203         if (journal) {
204                 if (is_journal_aborted(journal)) {
205                         ext4_abort(sb, __func__,
206                                    "Detected aborted journal");
207                         return ERR_PTR(-EROFS);
208                 }
209                 return jbd2_journal_start(journal, nblocks);
210         }
211         /*
212          * We're not journaling, return the appropriate indication.
213          */
214         current->journal_info = EXT4_NOJOURNAL_HANDLE;
215         return current->journal_info;
216 }
217
218 /*
219  * The only special thing we need to do here is to make sure that all
220  * jbd2_journal_stop calls result in the superblock being marked dirty, so
221  * that sync() will call the filesystem's write_super callback if
222  * appropriate.
223  */
224 int __ext4_journal_stop(const char *where, handle_t *handle)
225 {
226         struct super_block *sb;
227         int err;
228         int rc;
229
230         if (!ext4_handle_valid(handle)) {
231                 /*
232                  * Do this here since we don't call jbd2_journal_stop() in
233                  * no-journal mode.
234                  */
235                 current->journal_info = NULL;
236                 return 0;
237         }
238         sb = handle->h_transaction->t_journal->j_private;
239         err = handle->h_err;
240         rc = jbd2_journal_stop(handle);
241
242         if (!err)
243                 err = rc;
244         if (err)
245                 __ext4_std_error(sb, where, err);
246         return err;
247 }
248
249 void ext4_journal_abort_handle(const char *caller, const char *err_fn,
250                 struct buffer_head *bh, handle_t *handle, int err)
251 {
252         char nbuf[16];
253         const char *errstr = ext4_decode_error(NULL, err, nbuf);
254
255         BUG_ON(!ext4_handle_valid(handle));
256
257         if (bh)
258                 BUFFER_TRACE(bh, "abort");
259
260         if (!handle->h_err)
261                 handle->h_err = err;
262
263         if (is_handle_aborted(handle))
264                 return;
265
266         printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
267                caller, errstr, err_fn);
268
269         jbd2_journal_abort_handle(handle);
270 }
271
272 /* Deal with the reporting of failure conditions on a filesystem such as
273  * inconsistencies detected or read IO failures.
274  *
275  * On ext2, we can store the error state of the filesystem in the
276  * superblock.  That is not possible on ext4, because we may have other
277  * write ordering constraints on the superblock which prevent us from
278  * writing it out straight away; and given that the journal is about to
279  * be aborted, we can't rely on the current, or future, transactions to
280  * write out the superblock safely.
281  *
282  * We'll just use the jbd2_journal_abort() error code to record an error in
283  * the journal instead.  On recovery, the journal will compain about
284  * that error until we've noted it down and cleared it.
285  */
286
287 static void ext4_handle_error(struct super_block *sb)
288 {
289         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
290
291         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
292         es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
293
294         if (sb->s_flags & MS_RDONLY)
295                 return;
296
297         if (!test_opt(sb, ERRORS_CONT)) {
298                 journal_t *journal = EXT4_SB(sb)->s_journal;
299
300                 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
301                 if (journal)
302                         jbd2_journal_abort(journal, -EIO);
303         }
304         if (test_opt(sb, ERRORS_RO)) {
305                 printk(KERN_CRIT "Remounting filesystem read-only\n");
306                 sb->s_flags |= MS_RDONLY;
307         }
308         ext4_commit_super(sb, 1);
309         if (test_opt(sb, ERRORS_PANIC))
310                 panic("EXT4-fs (device %s): panic forced after error\n",
311                         sb->s_id);
312 }
313
314 void ext4_error(struct super_block *sb, const char *function,
315                 const char *fmt, ...)
316 {
317         va_list args;
318
319         va_start(args, fmt);
320         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
321         vprintk(fmt, args);
322         printk("\n");
323         va_end(args);
324
325         ext4_handle_error(sb);
326 }
327
328 static const char *ext4_decode_error(struct super_block *sb, int errno,
329                                      char nbuf[16])
330 {
331         char *errstr = NULL;
332
333         switch (errno) {
334         case -EIO:
335                 errstr = "IO failure";
336                 break;
337         case -ENOMEM:
338                 errstr = "Out of memory";
339                 break;
340         case -EROFS:
341                 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)
342                         errstr = "Journal has aborted";
343                 else
344                         errstr = "Readonly filesystem";
345                 break;
346         default:
347                 /* If the caller passed in an extra buffer for unknown
348                  * errors, textualise them now.  Else we just return
349                  * NULL. */
350                 if (nbuf) {
351                         /* Check for truncated error codes... */
352                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
353                                 errstr = nbuf;
354                 }
355                 break;
356         }
357
358         return errstr;
359 }
360
361 /* __ext4_std_error decodes expected errors from journaling functions
362  * automatically and invokes the appropriate error response.  */
363
364 void __ext4_std_error(struct super_block *sb, const char *function, int errno)
365 {
366         char nbuf[16];
367         const char *errstr;
368
369         /* Special case: if the error is EROFS, and we're not already
370          * inside a transaction, then there's really no point in logging
371          * an error. */
372         if (errno == -EROFS && journal_current_handle() == NULL &&
373             (sb->s_flags & MS_RDONLY))
374                 return;
375
376         errstr = ext4_decode_error(sb, errno, nbuf);
377         printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
378                sb->s_id, function, errstr);
379
380         ext4_handle_error(sb);
381 }
382
383 /*
384  * ext4_abort is a much stronger failure handler than ext4_error.  The
385  * abort function may be used to deal with unrecoverable failures such
386  * as journal IO errors or ENOMEM at a critical moment in log management.
387  *
388  * We unconditionally force the filesystem into an ABORT|READONLY state,
389  * unless the error response on the fs has been set to panic in which
390  * case we take the easy way out and panic immediately.
391  */
392
393 void ext4_abort(struct super_block *sb, const char *function,
394                 const char *fmt, ...)
395 {
396         va_list args;
397
398         printk(KERN_CRIT "ext4_abort called.\n");
399
400         va_start(args, fmt);
401         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
402         vprintk(fmt, args);
403         printk("\n");
404         va_end(args);
405
406         if (test_opt(sb, ERRORS_PANIC))
407                 panic("EXT4-fs panic from previous error\n");
408
409         if (sb->s_flags & MS_RDONLY)
410                 return;
411
412         printk(KERN_CRIT "Remounting filesystem read-only\n");
413         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
414         sb->s_flags |= MS_RDONLY;
415         EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
416         if (EXT4_SB(sb)->s_journal)
417                 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
418 }
419
420 void ext4_warning(struct super_block *sb, const char *function,
421                   const char *fmt, ...)
422 {
423         va_list args;
424
425         va_start(args, fmt);
426         printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
427                sb->s_id, function);
428         vprintk(fmt, args);
429         printk("\n");
430         va_end(args);
431 }
432
433 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
434                                 const char *function, const char *fmt, ...)
435 __releases(bitlock)
436 __acquires(bitlock)
437 {
438         va_list args;
439         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
440
441         va_start(args, fmt);
442         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
443         vprintk(fmt, args);
444         printk("\n");
445         va_end(args);
446
447         if (test_opt(sb, ERRORS_CONT)) {
448                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
449                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
450                 ext4_commit_super(sb, 0);
451                 return;
452         }
453         ext4_unlock_group(sb, grp);
454         ext4_handle_error(sb);
455         /*
456          * We only get here in the ERRORS_RO case; relocking the group
457          * may be dangerous, but nothing bad will happen since the
458          * filesystem will have already been marked read/only and the
459          * journal has been aborted.  We return 1 as a hint to callers
460          * who might what to use the return value from
461          * ext4_grp_locked_error() to distinguish beween the
462          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
463          * aggressively from the ext4 function in question, with a
464          * more appropriate error code.
465          */
466         ext4_lock_group(sb, grp);
467         return;
468 }
469
470
471 void ext4_update_dynamic_rev(struct super_block *sb)
472 {
473         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
474
475         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
476                 return;
477
478         ext4_warning(sb, __func__,
479                      "updating to rev %d because of new feature flag, "
480                      "running e2fsck is recommended",
481                      EXT4_DYNAMIC_REV);
482
483         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
484         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
485         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
486         /* leave es->s_feature_*compat flags alone */
487         /* es->s_uuid will be set by e2fsck if empty */
488
489         /*
490          * The rest of the superblock fields should be zero, and if not it
491          * means they are likely already in use, so leave them alone.  We
492          * can leave it up to e2fsck to clean up any inconsistencies there.
493          */
494 }
495
496 /*
497  * Open the external journal device
498  */
499 static struct block_device *ext4_blkdev_get(dev_t dev)
500 {
501         struct block_device *bdev;
502         char b[BDEVNAME_SIZE];
503
504         bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
505         if (IS_ERR(bdev))
506                 goto fail;
507         return bdev;
508
509 fail:
510         printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
511                         __bdevname(dev, b), PTR_ERR(bdev));
512         return NULL;
513 }
514
515 /*
516  * Release the journal device
517  */
518 static int ext4_blkdev_put(struct block_device *bdev)
519 {
520         bd_release(bdev);
521         return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
522 }
523
524 static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
525 {
526         struct block_device *bdev;
527         int ret = -ENODEV;
528
529         bdev = sbi->journal_bdev;
530         if (bdev) {
531                 ret = ext4_blkdev_put(bdev);
532                 sbi->journal_bdev = NULL;
533         }
534         return ret;
535 }
536
537 static inline struct inode *orphan_list_entry(struct list_head *l)
538 {
539         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
540 }
541
542 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
543 {
544         struct list_head *l;
545
546         printk(KERN_ERR "sb orphan head is %d\n",
547                le32_to_cpu(sbi->s_es->s_last_orphan));
548
549         printk(KERN_ERR "sb_info orphan list:\n");
550         list_for_each(l, &sbi->s_orphan) {
551                 struct inode *inode = orphan_list_entry(l);
552                 printk(KERN_ERR "  "
553                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
554                        inode->i_sb->s_id, inode->i_ino, inode,
555                        inode->i_mode, inode->i_nlink,
556                        NEXT_ORPHAN(inode));
557         }
558 }
559
560 static void ext4_put_super(struct super_block *sb)
561 {
562         struct ext4_sb_info *sbi = EXT4_SB(sb);
563         struct ext4_super_block *es = sbi->s_es;
564         int i, err;
565
566         ext4_mb_release(sb);
567         ext4_ext_release(sb);
568         ext4_xattr_put_super(sb);
569         if (sbi->s_journal) {
570                 err = jbd2_journal_destroy(sbi->s_journal);
571                 sbi->s_journal = NULL;
572                 if (err < 0)
573                         ext4_abort(sb, __func__,
574                                    "Couldn't clean up the journal");
575         }
576         if (!(sb->s_flags & MS_RDONLY)) {
577                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
578                 es->s_state = cpu_to_le16(sbi->s_mount_state);
579                 ext4_commit_super(sb, 1);
580         }
581         if (sbi->s_proc) {
582                 remove_proc_entry(sb->s_id, ext4_proc_root);
583         }
584         kobject_del(&sbi->s_kobj);
585
586         for (i = 0; i < sbi->s_gdb_count; i++)
587                 brelse(sbi->s_group_desc[i]);
588         kfree(sbi->s_group_desc);
589         if (is_vmalloc_addr(sbi->s_flex_groups))
590                 vfree(sbi->s_flex_groups);
591         else
592                 kfree(sbi->s_flex_groups);
593         percpu_counter_destroy(&sbi->s_freeblocks_counter);
594         percpu_counter_destroy(&sbi->s_freeinodes_counter);
595         percpu_counter_destroy(&sbi->s_dirs_counter);
596         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
597         brelse(sbi->s_sbh);
598 #ifdef CONFIG_QUOTA
599         for (i = 0; i < MAXQUOTAS; i++)
600                 kfree(sbi->s_qf_names[i]);
601 #endif
602
603         /* Debugging code just in case the in-memory inode orphan list
604          * isn't empty.  The on-disk one can be non-empty if we've
605          * detected an error and taken the fs readonly, but the
606          * in-memory list had better be clean by this point. */
607         if (!list_empty(&sbi->s_orphan))
608                 dump_orphan_list(sb, sbi);
609         J_ASSERT(list_empty(&sbi->s_orphan));
610
611         invalidate_bdev(sb->s_bdev);
612         if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
613                 /*
614                  * Invalidate the journal device's buffers.  We don't want them
615                  * floating about in memory - the physical journal device may
616                  * hotswapped, and it breaks the `ro-after' testing code.
617                  */
618                 sync_blockdev(sbi->journal_bdev);
619                 invalidate_bdev(sbi->journal_bdev);
620                 ext4_blkdev_remove(sbi);
621         }
622         sb->s_fs_info = NULL;
623         /*
624          * Now that we are completely done shutting down the
625          * superblock, we need to actually destroy the kobject.
626          */
627         unlock_kernel();
628         unlock_super(sb);
629         kobject_put(&sbi->s_kobj);
630         wait_for_completion(&sbi->s_kobj_unregister);
631         lock_super(sb);
632         lock_kernel();
633         kfree(sbi->s_blockgroup_lock);
634         kfree(sbi);
635         return;
636 }
637
638 static struct kmem_cache *ext4_inode_cachep;
639
640 /*
641  * Called inside transaction, so use GFP_NOFS
642  */
643 static struct inode *ext4_alloc_inode(struct super_block *sb)
644 {
645         struct ext4_inode_info *ei;
646
647         ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
648         if (!ei)
649                 return NULL;
650 #ifdef CONFIG_EXT4_FS_POSIX_ACL
651         ei->i_acl = EXT4_ACL_NOT_CACHED;
652         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
653 #endif
654         ei->vfs_inode.i_version = 1;
655         ei->vfs_inode.i_data.writeback_index = 0;
656         memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
657         INIT_LIST_HEAD(&ei->i_prealloc_list);
658         spin_lock_init(&ei->i_prealloc_lock);
659         /*
660          * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
661          * therefore it can be null here.  Don't check it, just initialize
662          * jinode.
663          */
664         jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
665         ei->i_reserved_data_blocks = 0;
666         ei->i_reserved_meta_blocks = 0;
667         ei->i_allocated_meta_blocks = 0;
668         ei->i_delalloc_reserved_flag = 0;
669         spin_lock_init(&(ei->i_block_reservation_lock));
670         return &ei->vfs_inode;
671 }
672
673 static void ext4_destroy_inode(struct inode *inode)
674 {
675         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
676                 printk("EXT4 Inode %p: orphan list check failed!\n",
677                         EXT4_I(inode));
678                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
679                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
680                                 true);
681                 dump_stack();
682         }
683         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
684 }
685
686 static void init_once(void *foo)
687 {
688         struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
689
690         INIT_LIST_HEAD(&ei->i_orphan);
691 #ifdef CONFIG_EXT4_FS_XATTR
692         init_rwsem(&ei->xattr_sem);
693 #endif
694         init_rwsem(&ei->i_data_sem);
695         inode_init_once(&ei->vfs_inode);
696 }
697
698 static int init_inodecache(void)
699 {
700         ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
701                                              sizeof(struct ext4_inode_info),
702                                              0, (SLAB_RECLAIM_ACCOUNT|
703                                                 SLAB_MEM_SPREAD),
704                                              init_once);
705         if (ext4_inode_cachep == NULL)
706                 return -ENOMEM;
707         return 0;
708 }
709
710 static void destroy_inodecache(void)
711 {
712         kmem_cache_destroy(ext4_inode_cachep);
713 }
714
715 static void ext4_clear_inode(struct inode *inode)
716 {
717 #ifdef CONFIG_EXT4_FS_POSIX_ACL
718         if (EXT4_I(inode)->i_acl &&
719                         EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
720                 posix_acl_release(EXT4_I(inode)->i_acl);
721                 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
722         }
723         if (EXT4_I(inode)->i_default_acl &&
724                         EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
725                 posix_acl_release(EXT4_I(inode)->i_default_acl);
726                 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
727         }
728 #endif
729         ext4_discard_preallocations(inode);
730         if (EXT4_JOURNAL(inode))
731                 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
732                                        &EXT4_I(inode)->jinode);
733 }
734
735 static inline void ext4_show_quota_options(struct seq_file *seq,
736                                            struct super_block *sb)
737 {
738 #if defined(CONFIG_QUOTA)
739         struct ext4_sb_info *sbi = EXT4_SB(sb);
740
741         if (sbi->s_jquota_fmt)
742                 seq_printf(seq, ",jqfmt=%s",
743                 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
744
745         if (sbi->s_qf_names[USRQUOTA])
746                 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
747
748         if (sbi->s_qf_names[GRPQUOTA])
749                 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
750
751         if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
752                 seq_puts(seq, ",usrquota");
753
754         if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
755                 seq_puts(seq, ",grpquota");
756 #endif
757 }
758
759 /*
760  * Show an option if
761  *  - it's set to a non-default value OR
762  *  - if the per-sb default is different from the global default
763  */
764 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
765 {
766         int def_errors;
767         unsigned long def_mount_opts;
768         struct super_block *sb = vfs->mnt_sb;
769         struct ext4_sb_info *sbi = EXT4_SB(sb);
770         struct ext4_super_block *es = sbi->s_es;
771
772         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
773         def_errors     = le16_to_cpu(es->s_errors);
774
775         if (sbi->s_sb_block != 1)
776                 seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
777         if (test_opt(sb, MINIX_DF))
778                 seq_puts(seq, ",minixdf");
779         if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
780                 seq_puts(seq, ",grpid");
781         if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
782                 seq_puts(seq, ",nogrpid");
783         if (sbi->s_resuid != EXT4_DEF_RESUID ||
784             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
785                 seq_printf(seq, ",resuid=%u", sbi->s_resuid);
786         }
787         if (sbi->s_resgid != EXT4_DEF_RESGID ||
788             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
789                 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
790         }
791         if (test_opt(sb, ERRORS_RO)) {
792                 if (def_errors == EXT4_ERRORS_PANIC ||
793                     def_errors == EXT4_ERRORS_CONTINUE) {
794                         seq_puts(seq, ",errors=remount-ro");
795                 }
796         }
797         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
798                 seq_puts(seq, ",errors=continue");
799         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
800                 seq_puts(seq, ",errors=panic");
801         if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
802                 seq_puts(seq, ",nouid32");
803         if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
804                 seq_puts(seq, ",debug");
805         if (test_opt(sb, OLDALLOC))
806                 seq_puts(seq, ",oldalloc");
807 #ifdef CONFIG_EXT4_FS_XATTR
808         if (test_opt(sb, XATTR_USER) &&
809                 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
810                 seq_puts(seq, ",user_xattr");
811         if (!test_opt(sb, XATTR_USER) &&
812             (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
813                 seq_puts(seq, ",nouser_xattr");
814         }
815 #endif
816 #ifdef CONFIG_EXT4_FS_POSIX_ACL
817         if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
818                 seq_puts(seq, ",acl");
819         if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
820                 seq_puts(seq, ",noacl");
821 #endif
822         if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
823                 seq_printf(seq, ",commit=%u",
824                            (unsigned) (sbi->s_commit_interval / HZ));
825         }
826         if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
827                 seq_printf(seq, ",min_batch_time=%u",
828                            (unsigned) sbi->s_min_batch_time);
829         }
830         if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
831                 seq_printf(seq, ",max_batch_time=%u",
832                            (unsigned) sbi->s_min_batch_time);
833         }
834
835         /*
836          * We're changing the default of barrier mount option, so
837          * let's always display its mount state so it's clear what its
838          * status is.
839          */
840         seq_puts(seq, ",barrier=");
841         seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
842         if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
843                 seq_puts(seq, ",journal_async_commit");
844         if (test_opt(sb, NOBH))
845                 seq_puts(seq, ",nobh");
846         if (test_opt(sb, I_VERSION))
847                 seq_puts(seq, ",i_version");
848         if (!test_opt(sb, DELALLOC))
849                 seq_puts(seq, ",nodelalloc");
850
851
852         if (sbi->s_stripe)
853                 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
854         /*
855          * journal mode get enabled in different ways
856          * So just print the value even if we didn't specify it
857          */
858         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
859                 seq_puts(seq, ",data=journal");
860         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
861                 seq_puts(seq, ",data=ordered");
862         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
863                 seq_puts(seq, ",data=writeback");
864
865         if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
866                 seq_printf(seq, ",inode_readahead_blks=%u",
867                            sbi->s_inode_readahead_blks);
868
869         if (test_opt(sb, DATA_ERR_ABORT))
870                 seq_puts(seq, ",data_err=abort");
871
872         if (test_opt(sb, NO_AUTO_DA_ALLOC))
873                 seq_puts(seq, ",noauto_da_alloc");
874
875         ext4_show_quota_options(seq, sb);
876         return 0;
877 }
878
879
880 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
881                 u64 ino, u32 generation)
882 {
883         struct inode *inode;
884
885         if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
886                 return ERR_PTR(-ESTALE);
887         if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
888                 return ERR_PTR(-ESTALE);
889
890         /* iget isn't really right if the inode is currently unallocated!!
891          *
892          * ext4_read_inode will return a bad_inode if the inode had been
893          * deleted, so we should be safe.
894          *
895          * Currently we don't know the generation for parent directory, so
896          * a generation of 0 means "accept any"
897          */
898         inode = ext4_iget(sb, ino);
899         if (IS_ERR(inode))
900                 return ERR_CAST(inode);
901         if (generation && inode->i_generation != generation) {
902                 iput(inode);
903                 return ERR_PTR(-ESTALE);
904         }
905
906         return inode;
907 }
908
909 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
910                 int fh_len, int fh_type)
911 {
912         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
913                                     ext4_nfs_get_inode);
914 }
915
916 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
917                 int fh_len, int fh_type)
918 {
919         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
920                                     ext4_nfs_get_inode);
921 }
922
923 /*
924  * Try to release metadata pages (indirect blocks, directories) which are
925  * mapped via the block device.  Since these pages could have journal heads
926  * which would prevent try_to_free_buffers() from freeing them, we must use
927  * jbd2 layer's try_to_free_buffers() function to release them.
928  */
929 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
930 {
931         journal_t *journal = EXT4_SB(sb)->s_journal;
932
933         WARN_ON(PageChecked(page));
934         if (!page_has_buffers(page))
935                 return 0;
936         if (journal)
937                 return jbd2_journal_try_to_free_buffers(journal, page,
938                                                         wait & ~__GFP_WAIT);
939         return try_to_free_buffers(page);
940 }
941
942 #ifdef CONFIG_QUOTA
943 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
944 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
945
946 static int ext4_write_dquot(struct dquot *dquot);
947 static int ext4_acquire_dquot(struct dquot *dquot);
948 static int ext4_release_dquot(struct dquot *dquot);
949 static int ext4_mark_dquot_dirty(struct dquot *dquot);
950 static int ext4_write_info(struct super_block *sb, int type);
951 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
952                                 char *path, int remount);
953 static int ext4_quota_on_mount(struct super_block *sb, int type);
954 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
955                                size_t len, loff_t off);
956 static ssize_t ext4_quota_write(struct super_block *sb, int type,
957                                 const char *data, size_t len, loff_t off);
958
959 static struct dquot_operations ext4_quota_operations = {
960         .initialize     = dquot_initialize,
961         .drop           = dquot_drop,
962         .alloc_space    = dquot_alloc_space,
963         .reserve_space  = dquot_reserve_space,
964         .claim_space    = dquot_claim_space,
965         .release_rsv    = dquot_release_reserved_space,
966         .get_reserved_space = ext4_get_reserved_space,
967         .alloc_inode    = dquot_alloc_inode,
968         .free_space     = dquot_free_space,
969         .free_inode     = dquot_free_inode,
970         .transfer       = dquot_transfer,
971         .write_dquot    = ext4_write_dquot,
972         .acquire_dquot  = ext4_acquire_dquot,
973         .release_dquot  = ext4_release_dquot,
974         .mark_dirty     = ext4_mark_dquot_dirty,
975         .write_info     = ext4_write_info,
976         .alloc_dquot    = dquot_alloc,
977         .destroy_dquot  = dquot_destroy,
978 };
979
980 static struct quotactl_ops ext4_qctl_operations = {
981         .quota_on       = ext4_quota_on,
982         .quota_off      = vfs_quota_off,
983         .quota_sync     = vfs_quota_sync,
984         .get_info       = vfs_get_dqinfo,
985         .set_info       = vfs_set_dqinfo,
986         .get_dqblk      = vfs_get_dqblk,
987         .set_dqblk      = vfs_set_dqblk
988 };
989 #endif
990
991 static const struct super_operations ext4_sops = {
992         .alloc_inode    = ext4_alloc_inode,
993         .destroy_inode  = ext4_destroy_inode,
994         .write_inode    = ext4_write_inode,
995         .dirty_inode    = ext4_dirty_inode,
996         .delete_inode   = ext4_delete_inode,
997         .put_super      = ext4_put_super,
998         .sync_fs        = ext4_sync_fs,
999         .freeze_fs      = ext4_freeze,
1000         .unfreeze_fs    = ext4_unfreeze,
1001         .statfs         = ext4_statfs,
1002         .remount_fs     = ext4_remount,
1003         .clear_inode    = ext4_clear_inode,
1004         .show_options   = ext4_show_options,
1005 #ifdef CONFIG_QUOTA
1006         .quota_read     = ext4_quota_read,
1007         .quota_write    = ext4_quota_write,
1008 #endif
1009         .bdev_try_to_free_page = bdev_try_to_free_page,
1010 };
1011
1012 static const struct super_operations ext4_nojournal_sops = {
1013         .alloc_inode    = ext4_alloc_inode,
1014         .destroy_inode  = ext4_destroy_inode,
1015         .write_inode    = ext4_write_inode,
1016         .dirty_inode    = ext4_dirty_inode,
1017         .delete_inode   = ext4_delete_inode,
1018         .write_super    = ext4_write_super,
1019         .put_super      = ext4_put_super,
1020         .statfs         = ext4_statfs,
1021         .remount_fs     = ext4_remount,
1022         .clear_inode    = ext4_clear_inode,
1023         .show_options   = ext4_show_options,
1024 #ifdef CONFIG_QUOTA
1025         .quota_read     = ext4_quota_read,
1026         .quota_write    = ext4_quota_write,
1027 #endif
1028         .bdev_try_to_free_page = bdev_try_to_free_page,
1029 };
1030
1031 static const struct export_operations ext4_export_ops = {
1032         .fh_to_dentry = ext4_fh_to_dentry,
1033         .fh_to_parent = ext4_fh_to_parent,
1034         .get_parent = ext4_get_parent,
1035 };
1036
1037 enum {
1038         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1039         Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1040         Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
1041         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1042         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
1043         Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1044         Opt_journal_update, Opt_journal_dev,
1045         Opt_journal_checksum, Opt_journal_async_commit,
1046         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1047         Opt_data_err_abort, Opt_data_err_ignore,
1048         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1049         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1050         Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1051         Opt_usrquota, Opt_grpquota, Opt_i_version,
1052         Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1053         Opt_inode_readahead_blks, Opt_journal_ioprio
1054 };
1055
1056 static const match_table_t tokens = {
1057         {Opt_bsd_df, "bsddf"},
1058         {Opt_minix_df, "minixdf"},
1059         {Opt_grpid, "grpid"},
1060         {Opt_grpid, "bsdgroups"},
1061         {Opt_nogrpid, "nogrpid"},
1062         {Opt_nogrpid, "sysvgroups"},
1063         {Opt_resgid, "resgid=%u"},
1064         {Opt_resuid, "resuid=%u"},
1065         {Opt_sb, "sb=%u"},
1066         {Opt_err_cont, "errors=continue"},
1067         {Opt_err_panic, "errors=panic"},
1068         {Opt_err_ro, "errors=remount-ro"},
1069         {Opt_nouid32, "nouid32"},
1070         {Opt_debug, "debug"},
1071         {Opt_oldalloc, "oldalloc"},
1072         {Opt_orlov, "orlov"},
1073         {Opt_user_xattr, "user_xattr"},
1074         {Opt_nouser_xattr, "nouser_xattr"},
1075         {Opt_acl, "acl"},
1076         {Opt_noacl, "noacl"},
1077         {Opt_noload, "noload"},
1078         {Opt_nobh, "nobh"},
1079         {Opt_bh, "bh"},
1080         {Opt_commit, "commit=%u"},
1081         {Opt_min_batch_time, "min_batch_time=%u"},
1082         {Opt_max_batch_time, "max_batch_time=%u"},
1083         {Opt_journal_update, "journal=update"},
1084         {Opt_journal_dev, "journal_dev=%u"},
1085         {Opt_journal_checksum, "journal_checksum"},
1086         {Opt_journal_async_commit, "journal_async_commit"},
1087         {Opt_abort, "abort"},
1088         {Opt_data_journal, "data=journal"},
1089         {Opt_data_ordered, "data=ordered"},
1090         {Opt_data_writeback, "data=writeback"},
1091         {Opt_data_err_abort, "data_err=abort"},
1092         {Opt_data_err_ignore, "data_err=ignore"},
1093         {Opt_offusrjquota, "usrjquota="},
1094         {Opt_usrjquota, "usrjquota=%s"},
1095         {Opt_offgrpjquota, "grpjquota="},
1096         {Opt_grpjquota, "grpjquota=%s"},
1097         {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1098         {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1099         {Opt_grpquota, "grpquota"},
1100         {Opt_noquota, "noquota"},
1101         {Opt_quota, "quota"},
1102         {Opt_usrquota, "usrquota"},
1103         {Opt_barrier, "barrier=%u"},
1104         {Opt_barrier, "barrier"},
1105         {Opt_nobarrier, "nobarrier"},
1106         {Opt_i_version, "i_version"},
1107         {Opt_stripe, "stripe=%u"},
1108         {Opt_resize, "resize"},
1109         {Opt_delalloc, "delalloc"},
1110         {Opt_nodelalloc, "nodelalloc"},
1111         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1112         {Opt_journal_ioprio, "journal_ioprio=%u"},
1113         {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1114         {Opt_auto_da_alloc, "auto_da_alloc"},
1115         {Opt_noauto_da_alloc, "noauto_da_alloc"},
1116         {Opt_err, NULL},
1117 };
1118
1119 static ext4_fsblk_t get_sb_block(void **data)
1120 {
1121         ext4_fsblk_t    sb_block;
1122         char            *options = (char *) *data;
1123
1124         if (!options || strncmp(options, "sb=", 3) != 0)
1125                 return 1;       /* Default location */
1126         options += 3;
1127         /*todo: use simple_strtoll with >32bit ext4 */
1128         sb_block = simple_strtoul(options, &options, 0);
1129         if (*options && *options != ',') {
1130                 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1131                        (char *) *data);
1132                 return 1;
1133         }
1134         if (*options == ',')
1135                 options++;
1136         *data = (void *) options;
1137         return sb_block;
1138 }
1139
1140 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1141
1142 static int parse_options(char *options, struct super_block *sb,
1143                          unsigned long *journal_devnum,
1144                          unsigned int *journal_ioprio,
1145                          ext4_fsblk_t *n_blocks_count, int is_remount)
1146 {
1147         struct ext4_sb_info *sbi = EXT4_SB(sb);
1148         char *p;
1149         substring_t args[MAX_OPT_ARGS];
1150         int data_opt = 0;
1151         int option;
1152 #ifdef CONFIG_QUOTA
1153         int qtype, qfmt;
1154         char *qname;
1155 #endif
1156
1157         if (!options)
1158                 return 1;
1159
1160         while ((p = strsep(&options, ",")) != NULL) {
1161                 int token;
1162                 if (!*p)
1163                         continue;
1164
1165                 token = match_token(p, tokens, args);
1166                 switch (token) {
1167                 case Opt_bsd_df:
1168                         clear_opt(sbi->s_mount_opt, MINIX_DF);
1169                         break;
1170                 case Opt_minix_df:
1171                         set_opt(sbi->s_mount_opt, MINIX_DF);
1172                         break;
1173                 case Opt_grpid:
1174                         set_opt(sbi->s_mount_opt, GRPID);
1175                         break;
1176                 case Opt_nogrpid:
1177                         clear_opt(sbi->s_mount_opt, GRPID);
1178                         break;
1179                 case Opt_resuid:
1180                         if (match_int(&args[0], &option))
1181                                 return 0;
1182                         sbi->s_resuid = option;
1183                         break;
1184                 case Opt_resgid:
1185                         if (match_int(&args[0], &option))
1186                                 return 0;
1187                         sbi->s_resgid = option;
1188                         break;
1189                 case Opt_sb:
1190                         /* handled by get_sb_block() instead of here */
1191                         /* *sb_block = match_int(&args[0]); */
1192                         break;
1193                 case Opt_err_panic:
1194                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1195                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1196                         set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1197                         break;
1198                 case Opt_err_ro:
1199                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1200                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1201                         set_opt(sbi->s_mount_opt, ERRORS_RO);
1202                         break;
1203                 case Opt_err_cont:
1204                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1205                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1206                         set_opt(sbi->s_mount_opt, ERRORS_CONT);
1207                         break;
1208                 case Opt_nouid32:
1209                         set_opt(sbi->s_mount_opt, NO_UID32);
1210                         break;
1211                 case Opt_debug:
1212                         set_opt(sbi->s_mount_opt, DEBUG);
1213                         break;
1214                 case Opt_oldalloc:
1215                         set_opt(sbi->s_mount_opt, OLDALLOC);
1216                         break;
1217                 case Opt_orlov:
1218                         clear_opt(sbi->s_mount_opt, OLDALLOC);
1219                         break;
1220 #ifdef CONFIG_EXT4_FS_XATTR
1221                 case Opt_user_xattr:
1222                         set_opt(sbi->s_mount_opt, XATTR_USER);
1223                         break;
1224                 case Opt_nouser_xattr:
1225                         clear_opt(sbi->s_mount_opt, XATTR_USER);
1226                         break;
1227 #else
1228                 case Opt_user_xattr:
1229                 case Opt_nouser_xattr:
1230                         printk(KERN_ERR "EXT4 (no)user_xattr options "
1231                                "not supported\n");
1232                         break;
1233 #endif
1234 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1235                 case Opt_acl:
1236                         set_opt(sbi->s_mount_opt, POSIX_ACL);
1237                         break;
1238                 case Opt_noacl:
1239                         clear_opt(sbi->s_mount_opt, POSIX_ACL);
1240                         break;
1241 #else
1242                 case Opt_acl:
1243                 case Opt_noacl:
1244                         printk(KERN_ERR "EXT4 (no)acl options "
1245                                "not supported\n");
1246                         break;
1247 #endif
1248                 case Opt_journal_update:
1249                         /* @@@ FIXME */
1250                         /* Eventually we will want to be able to create
1251                            a journal file here.  For now, only allow the
1252                            user to specify an existing inode to be the
1253                            journal file. */
1254                         if (is_remount) {
1255                                 printk(KERN_ERR "EXT4-fs: cannot specify "
1256                                        "journal on remount\n");
1257                                 return 0;
1258                         }
1259                         set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1260                         break;
1261                 case Opt_journal_dev:
1262                         if (is_remount) {
1263                                 printk(KERN_ERR "EXT4-fs: cannot specify "
1264                                        "journal on remount\n");
1265                                 return 0;
1266                         }
1267                         if (match_int(&args[0], &option))
1268                                 return 0;
1269                         *journal_devnum = option;
1270                         break;
1271                 case Opt_journal_checksum:
1272                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1273                         break;
1274                 case Opt_journal_async_commit:
1275                         set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1276                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1277                         break;
1278                 case Opt_noload:
1279                         set_opt(sbi->s_mount_opt, NOLOAD);
1280                         break;
1281                 case Opt_commit:
1282                         if (match_int(&args[0], &option))
1283                                 return 0;
1284                         if (option < 0)
1285                                 return 0;
1286                         if (option == 0)
1287                                 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1288                         sbi->s_commit_interval = HZ * option;
1289                         break;
1290                 case Opt_max_batch_time:
1291                         if (match_int(&args[0], &option))
1292                                 return 0;
1293                         if (option < 0)
1294                                 return 0;
1295                         if (option == 0)
1296                                 option = EXT4_DEF_MAX_BATCH_TIME;
1297                         sbi->s_max_batch_time = option;
1298                         break;
1299                 case Opt_min_batch_time:
1300                         if (match_int(&args[0], &option))
1301                                 return 0;
1302                         if (option < 0)
1303                                 return 0;
1304                         sbi->s_min_batch_time = option;
1305                         break;
1306                 case Opt_data_journal:
1307                         data_opt = EXT4_MOUNT_JOURNAL_DATA;
1308                         goto datacheck;
1309                 case Opt_data_ordered:
1310                         data_opt = EXT4_MOUNT_ORDERED_DATA;
1311                         goto datacheck;
1312                 case Opt_data_writeback:
1313                         data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1314                 datacheck:
1315                         if (is_remount) {
1316                                 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1317                                                 != data_opt) {
1318                                         printk(KERN_ERR
1319                                                 "EXT4-fs: cannot change data "
1320                                                 "mode on remount\n");
1321                                         return 0;
1322                                 }
1323                         } else {
1324                                 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
1325                                 sbi->s_mount_opt |= data_opt;
1326                         }
1327                         break;
1328                 case Opt_data_err_abort:
1329                         set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1330                         break;
1331                 case Opt_data_err_ignore:
1332                         clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1333                         break;
1334 #ifdef CONFIG_QUOTA
1335                 case Opt_usrjquota:
1336                         qtype = USRQUOTA;
1337                         goto set_qf_name;
1338                 case Opt_grpjquota:
1339                         qtype = GRPQUOTA;
1340 set_qf_name:
1341                         if (sb_any_quota_loaded(sb) &&
1342                             !sbi->s_qf_names[qtype]) {
1343                                 printk(KERN_ERR
1344                                        "EXT4-fs: Cannot change journaled "
1345                                        "quota options when quota turned on.\n");
1346                                 return 0;
1347                         }
1348                         qname = match_strdup(&args[0]);
1349                         if (!qname) {
1350                                 printk(KERN_ERR
1351                                         "EXT4-fs: not enough memory for "
1352                                         "storing quotafile name.\n");
1353                                 return 0;
1354                         }
1355                         if (sbi->s_qf_names[qtype] &&
1356                             strcmp(sbi->s_qf_names[qtype], qname)) {
1357                                 printk(KERN_ERR
1358                                         "EXT4-fs: %s quota file already "
1359                                         "specified.\n", QTYPE2NAME(qtype));
1360                                 kfree(qname);
1361                                 return 0;
1362                         }
1363                         sbi->s_qf_names[qtype] = qname;
1364                         if (strchr(sbi->s_qf_names[qtype], '/')) {
1365                                 printk(KERN_ERR
1366                                         "EXT4-fs: quotafile must be on "
1367                                         "filesystem root.\n");
1368                                 kfree(sbi->s_qf_names[qtype]);
1369                                 sbi->s_qf_names[qtype] = NULL;
1370                                 return 0;
1371                         }
1372                         set_opt(sbi->s_mount_opt, QUOTA);
1373                         break;
1374                 case Opt_offusrjquota:
1375                         qtype = USRQUOTA;
1376                         goto clear_qf_name;
1377                 case Opt_offgrpjquota:
1378                         qtype = GRPQUOTA;
1379 clear_qf_name:
1380                         if (sb_any_quota_loaded(sb) &&
1381                             sbi->s_qf_names[qtype]) {
1382                                 printk(KERN_ERR "EXT4-fs: Cannot change "
1383                                         "journaled quota options when "
1384                                         "quota turned on.\n");
1385                                 return 0;
1386                         }
1387                         /*
1388                          * The space will be released later when all options
1389                          * are confirmed to be correct
1390                          */
1391                         sbi->s_qf_names[qtype] = NULL;
1392                         break;
1393                 case Opt_jqfmt_vfsold:
1394                         qfmt = QFMT_VFS_OLD;
1395                         goto set_qf_format;
1396                 case Opt_jqfmt_vfsv0:
1397                         qfmt = QFMT_VFS_V0;
1398 set_qf_format:
1399                         if (sb_any_quota_loaded(sb) &&
1400                             sbi->s_jquota_fmt != qfmt) {
1401                                 printk(KERN_ERR "EXT4-fs: Cannot change "
1402                                         "journaled quota options when "
1403                                         "quota turned on.\n");
1404                                 return 0;
1405                         }
1406                         sbi->s_jquota_fmt = qfmt;
1407                         break;
1408                 case Opt_quota:
1409                 case Opt_usrquota:
1410                         set_opt(sbi->s_mount_opt, QUOTA);
1411                         set_opt(sbi->s_mount_opt, USRQUOTA);
1412                         break;
1413                 case Opt_grpquota:
1414                         set_opt(sbi->s_mount_opt, QUOTA);
1415                         set_opt(sbi->s_mount_opt, GRPQUOTA);
1416                         break;
1417                 case Opt_noquota:
1418                         if (sb_any_quota_loaded(sb)) {
1419                                 printk(KERN_ERR "EXT4-fs: Cannot change quota "
1420                                         "options when quota turned on.\n");
1421                                 return 0;
1422                         }
1423                         clear_opt(sbi->s_mount_opt, QUOTA);
1424                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1425                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1426                         break;
1427 #else
1428                 case Opt_quota:
1429                 case Opt_usrquota:
1430                 case Opt_grpquota:
1431                         printk(KERN_ERR
1432                                 "EXT4-fs: quota options not supported.\n");
1433                         break;
1434                 case Opt_usrjquota:
1435                 case Opt_grpjquota:
1436                 case Opt_offusrjquota:
1437                 case Opt_offgrpjquota:
1438                 case Opt_jqfmt_vfsold:
1439                 case Opt_jqfmt_vfsv0:
1440                         printk(KERN_ERR
1441                                 "EXT4-fs: journaled quota options not "
1442                                 "supported.\n");
1443                         break;
1444                 case Opt_noquota:
1445                         break;
1446 #endif
1447                 case Opt_abort:
1448                         set_opt(sbi->s_mount_opt, ABORT);
1449                         break;
1450                 case Opt_nobarrier:
1451                         clear_opt(sbi->s_mount_opt, BARRIER);
1452                         break;
1453                 case Opt_barrier:
1454                         if (match_int(&args[0], &option)) {
1455                                 set_opt(sbi->s_mount_opt, BARRIER);
1456                                 break;
1457                         }
1458                         if (option)
1459                                 set_opt(sbi->s_mount_opt, BARRIER);
1460                         else
1461                                 clear_opt(sbi->s_mount_opt, BARRIER);
1462                         break;
1463                 case Opt_ignore:
1464                         break;
1465                 case Opt_resize:
1466                         if (!is_remount) {
1467                                 printk("EXT4-fs: resize option only available "
1468                                         "for remount\n");
1469                                 return 0;
1470                         }
1471                         if (match_int(&args[0], &option) != 0)
1472                                 return 0;
1473                         *n_blocks_count = option;
1474                         break;
1475                 case Opt_nobh:
1476                         set_opt(sbi->s_mount_opt, NOBH);
1477                         break;
1478                 case Opt_bh:
1479                         clear_opt(sbi->s_mount_opt, NOBH);
1480                         break;
1481                 case Opt_i_version:
1482                         set_opt(sbi->s_mount_opt, I_VERSION);
1483                         sb->s_flags |= MS_I_VERSION;
1484                         break;
1485                 case Opt_nodelalloc:
1486                         clear_opt(sbi->s_mount_opt, DELALLOC);
1487                         break;
1488                 case Opt_stripe:
1489                         if (match_int(&args[0], &option))
1490                                 return 0;
1491                         if (option < 0)
1492                                 return 0;
1493                         sbi->s_stripe = option;
1494                         break;
1495                 case Opt_delalloc:
1496                         set_opt(sbi->s_mount_opt, DELALLOC);
1497                         break;
1498                 case Opt_inode_readahead_blks:
1499                         if (match_int(&args[0], &option))
1500                                 return 0;
1501                         if (option < 0 || option > (1 << 30))
1502                                 return 0;
1503                         if (!is_power_of_2(option)) {
1504                                 printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
1505                                        " must be a power of 2\n");
1506                                 return 0;
1507                         }
1508                         sbi->s_inode_readahead_blks = option;
1509                         break;
1510                 case Opt_journal_ioprio:
1511                         if (match_int(&args[0], &option))
1512                                 return 0;
1513                         if (option < 0 || option > 7)
1514                                 break;
1515                         *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1516                                                             option);
1517                         break;
1518                 case Opt_noauto_da_alloc:
1519                         set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1520                         break;
1521                 case Opt_auto_da_alloc:
1522                         if (match_int(&args[0], &option)) {
1523                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1524                                 break;
1525                         }
1526                         if (option)
1527                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1528                         else
1529                                 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1530                         break;
1531                 default:
1532                         printk(KERN_ERR
1533                                "EXT4-fs: Unrecognized mount option \"%s\" "
1534                                "or missing value\n", p);
1535                         return 0;
1536                 }
1537         }
1538 #ifdef CONFIG_QUOTA
1539         if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1540                 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
1541                      sbi->s_qf_names[USRQUOTA])
1542                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1543
1544                 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
1545                      sbi->s_qf_names[GRPQUOTA])
1546                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1547
1548                 if ((sbi->s_qf_names[USRQUOTA] &&
1549                                 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1550                     (sbi->s_qf_names[GRPQUOTA] &&
1551                                 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1552                         printk(KERN_ERR "EXT4-fs: old and new quota "
1553                                         "format mixing.\n");
1554                         return 0;
1555                 }
1556
1557                 if (!sbi->s_jquota_fmt) {
1558                         printk(KERN_ERR "EXT4-fs: journaled quota format "
1559                                         "not specified.\n");
1560                         return 0;
1561                 }
1562         } else {
1563                 if (sbi->s_jquota_fmt) {
1564                         printk(KERN_ERR "EXT4-fs: journaled quota format "
1565                                         "specified with no journaling "
1566                                         "enabled.\n");
1567                         return 0;
1568                 }
1569         }
1570 #endif
1571         return 1;
1572 }
1573
1574 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1575                             int read_only)
1576 {
1577         struct ext4_sb_info *sbi = EXT4_SB(sb);
1578         int res = 0;
1579
1580         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1581                 printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1582                        "forcing read-only mode\n");
1583                 res = MS_RDONLY;
1584         }
1585         if (read_only)
1586                 return res;
1587         if (!(sbi->s_mount_state & EXT4_VALID_FS))
1588                 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1589                        "running e2fsck is recommended\n");
1590         else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1591                 printk(KERN_WARNING
1592                        "EXT4-fs warning: mounting fs with errors, "
1593                        "running e2fsck is recommended\n");
1594         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1595                  le16_to_cpu(es->s_mnt_count) >=
1596                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1597                 printk(KERN_WARNING
1598                        "EXT4-fs warning: maximal mount count reached, "
1599                        "running e2fsck is recommended\n");
1600         else if (le32_to_cpu(es->s_checkinterval) &&
1601                 (le32_to_cpu(es->s_lastcheck) +
1602                         le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1603                 printk(KERN_WARNING
1604                        "EXT4-fs warning: checktime reached, "
1605                        "running e2fsck is recommended\n");
1606         if (!sbi->s_journal) 
1607                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1608         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1609                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1610         le16_add_cpu(&es->s_mnt_count, 1);
1611         es->s_mtime = cpu_to_le32(get_seconds());
1612         ext4_update_dynamic_rev(sb);
1613         if (sbi->s_journal)
1614                 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1615
1616         ext4_commit_super(sb, 1);
1617         if (test_opt(sb, DEBUG))
1618                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1619                                 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
1620                         sb->s_blocksize,
1621                         sbi->s_groups_count,
1622                         EXT4_BLOCKS_PER_GROUP(sb),
1623                         EXT4_INODES_PER_GROUP(sb),
1624                         sbi->s_mount_opt);
1625
1626         if (EXT4_SB(sb)->s_journal) {
1627                 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
1628                        sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1629                        "external", EXT4_SB(sb)->s_journal->j_devname);
1630         } else {
1631                 printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
1632         }
1633         return res;
1634 }
1635
1636 static int ext4_fill_flex_info(struct super_block *sb)
1637 {
1638         struct ext4_sb_info *sbi = EXT4_SB(sb);
1639         struct ext4_group_desc *gdp = NULL;
1640         struct buffer_head *bh;
1641         ext4_group_t flex_group_count;
1642         ext4_group_t flex_group;
1643         int groups_per_flex = 0;
1644         size_t size;
1645         int i;
1646
1647         if (!sbi->s_es->s_log_groups_per_flex) {
1648                 sbi->s_log_groups_per_flex = 0;
1649                 return 1;
1650         }
1651
1652         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1653         groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1654
1655         /* We allocate both existing and potentially added groups */
1656         flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1657                         ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1658                               EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1659         size = flex_group_count * sizeof(struct flex_groups);
1660         sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1661         if (sbi->s_flex_groups == NULL) {
1662                 sbi->s_flex_groups = vmalloc(size);
1663                 if (sbi->s_flex_groups)
1664                         memset(sbi->s_flex_groups, 0, size);
1665         }
1666         if (sbi->s_flex_groups == NULL) {
1667                 printk(KERN_ERR "EXT4-fs: not enough memory for "
1668                                 "%u flex groups\n", flex_group_count);
1669                 goto failed;
1670         }
1671
1672         for (i = 0; i < sbi->s_groups_count; i++) {
1673                 gdp = ext4_get_group_desc(sb, i, &bh);
1674
1675                 flex_group = ext4_flex_group(sbi, i);
1676                 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
1677                            ext4_free_inodes_count(sb, gdp));
1678                 atomic_set(&sbi->s_flex_groups[flex_group].free_blocks,
1679                            ext4_free_blks_count(sb, gdp));
1680                 atomic_set(&sbi->s_flex_groups[flex_group].used_dirs,
1681                            ext4_used_dirs_count(sb, gdp));
1682         }
1683
1684         return 1;
1685 failed:
1686         return 0;
1687 }
1688
1689 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1690                             struct ext4_group_desc *gdp)
1691 {
1692         __u16 crc = 0;
1693
1694         if (sbi->s_es->s_feature_ro_compat &
1695             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1696                 int offset = offsetof(struct ext4_group_desc, bg_checksum);
1697                 __le32 le_group = cpu_to_le32(block_group);
1698
1699                 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1700                 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1701                 crc = crc16(crc, (__u8 *)gdp, offset);
1702                 offset += sizeof(gdp->bg_checksum); /* skip checksum */
1703                 /* for checksum of struct ext4_group_desc do the rest...*/
1704                 if ((sbi->s_es->s_feature_incompat &
1705                      cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1706                     offset < le16_to_cpu(sbi->s_es->s_desc_size))
1707                         crc = crc16(crc, (__u8 *)gdp + offset,
1708                                     le16_to_cpu(sbi->s_es->s_desc_size) -
1709                                         offset);
1710         }
1711
1712         return cpu_to_le16(crc);
1713 }
1714
1715 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1716                                 struct ext4_group_desc *gdp)
1717 {
1718         if ((sbi->s_es->s_feature_ro_compat &
1719              cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1720             (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1721                 return 0;
1722
1723         return 1;
1724 }
1725
1726 /* Called at mount-time, super-block is locked */
1727 static int ext4_check_descriptors(struct super_block *sb)
1728 {
1729         struct ext4_sb_info *sbi = EXT4_SB(sb);
1730         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1731         ext4_fsblk_t last_block;
1732         ext4_fsblk_t block_bitmap;
1733         ext4_fsblk_t inode_bitmap;
1734         ext4_fsblk_t inode_table;
1735         int flexbg_flag = 0;
1736         ext4_group_t i;
1737
1738         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1739                 flexbg_flag = 1;
1740
1741         ext4_debug("Checking group descriptors");
1742
1743         for (i = 0; i < sbi->s_groups_count; i++) {
1744                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1745
1746                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
1747                         last_block = ext4_blocks_count(sbi->s_es) - 1;
1748                 else
1749                         last_block = first_block +
1750                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1751
1752                 block_bitmap = ext4_block_bitmap(sb, gdp);
1753                 if (block_bitmap < first_block || block_bitmap > last_block) {
1754                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1755                                "Block bitmap for group %u not in group "
1756                                "(block %llu)!\n", i, block_bitmap);
1757                         return 0;
1758                 }
1759                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1760                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1761                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1762                                "Inode bitmap for group %u not in group "
1763                                "(block %llu)!\n", i, inode_bitmap);
1764                         return 0;
1765                 }
1766                 inode_table = ext4_inode_table(sb, gdp);
1767                 if (inode_table < first_block ||
1768                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
1769                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1770                                "Inode table for group %u not in group "
1771                                "(block %llu)!\n", i, inode_table);
1772                         return 0;
1773                 }
1774                 spin_lock(sb_bgl_lock(sbi, i));
1775                 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1776                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1777                                "Checksum for group %u failed (%u!=%u)\n",
1778                                i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1779                                gdp)), le16_to_cpu(gdp->bg_checksum));
1780                         if (!(sb->s_flags & MS_RDONLY)) {
1781                                 spin_unlock(sb_bgl_lock(sbi, i));
1782                                 return 0;
1783                         }
1784                 }
1785                 spin_unlock(sb_bgl_lock(sbi, i));
1786                 if (!flexbg_flag)
1787                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
1788         }
1789
1790         ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1791         sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1792         return 1;
1793 }
1794
1795 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
1796  * the superblock) which were deleted from all directories, but held open by
1797  * a process at the time of a crash.  We walk the list and try to delete these
1798  * inodes at recovery time (only with a read-write filesystem).
1799  *
1800  * In order to keep the orphan inode chain consistent during traversal (in
1801  * case of crash during recovery), we link each inode into the superblock
1802  * orphan list_head and handle it the same way as an inode deletion during
1803  * normal operation (which journals the operations for us).
1804  *
1805  * We only do an iget() and an iput() on each inode, which is very safe if we
1806  * accidentally point at an in-use or already deleted inode.  The worst that
1807  * can happen in this case is that we get a "bit already cleared" message from
1808  * ext4_free_inode().  The only reason we would point at a wrong inode is if
1809  * e2fsck was run on this filesystem, and it must have already done the orphan
1810  * inode cleanup for us, so we can safely abort without any further action.
1811  */
1812 static void ext4_orphan_cleanup(struct super_block *sb,
1813                                 struct ext4_super_block *es)
1814 {
1815         unsigned int s_flags = sb->s_flags;
1816         int nr_orphans = 0, nr_truncates = 0;
1817 #ifdef CONFIG_QUOTA
1818         int i;
1819 #endif
1820         if (!es->s_last_orphan) {
1821                 jbd_debug(4, "no orphan inodes to clean up\n");
1822                 return;
1823         }
1824
1825         if (bdev_read_only(sb->s_bdev)) {
1826                 printk(KERN_ERR "EXT4-fs: write access "
1827                         "unavailable, skipping orphan cleanup.\n");
1828                 return;
1829         }
1830
1831         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1832                 if (es->s_last_orphan)
1833                         jbd_debug(1, "Errors on filesystem, "
1834                                   "clearing orphan list.\n");
1835                 es->s_last_orphan = 0;
1836                 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1837                 return;
1838         }
1839
1840         if (s_flags & MS_RDONLY) {
1841                 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
1842                        sb->s_id);
1843                 sb->s_flags &= ~MS_RDONLY;
1844         }
1845 #ifdef CONFIG_QUOTA
1846         /* Needed for iput() to work correctly and not trash data */
1847         sb->s_flags |= MS_ACTIVE;
1848         /* Turn on quotas so that they are updated correctly */
1849         for (i = 0; i < MAXQUOTAS; i++) {
1850                 if (EXT4_SB(sb)->s_qf_names[i]) {
1851                         int ret = ext4_quota_on_mount(sb, i);
1852                         if (ret < 0)
1853                                 printk(KERN_ERR
1854                                         "EXT4-fs: Cannot turn on journaled "
1855                                         "quota: error %d\n", ret);
1856                 }
1857         }
1858 #endif
1859
1860         while (es->s_last_orphan) {
1861                 struct inode *inode;
1862
1863                 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1864                 if (IS_ERR(inode)) {
1865                         es->s_last_orphan = 0;
1866                         break;
1867                 }
1868
1869                 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1870                 vfs_dq_init(inode);
1871                 if (inode->i_nlink) {
1872                         printk(KERN_DEBUG
1873                                 "%s: truncating inode %lu to %lld bytes\n",
1874                                 __func__, inode->i_ino, inode->i_size);
1875                         jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1876                                   inode->i_ino, inode->i_size);
1877                         ext4_truncate(inode);
1878                         nr_truncates++;
1879                 } else {
1880                         printk(KERN_DEBUG
1881                                 "%s: deleting unreferenced inode %lu\n",
1882                                 __func__, inode->i_ino);
1883                         jbd_debug(2, "deleting unreferenced inode %lu\n",
1884                                   inode->i_ino);
1885                         nr_orphans++;
1886                 }
1887                 iput(inode);  /* The delete magic happens here! */
1888         }
1889
1890 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1891
1892         if (nr_orphans)
1893                 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
1894                        sb->s_id, PLURAL(nr_orphans));
1895         if (nr_truncates)
1896                 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
1897                        sb->s_id, PLURAL(nr_truncates));
1898 #ifdef CONFIG_QUOTA
1899         /* Turn quotas off */
1900         for (i = 0; i < MAXQUOTAS; i++) {
1901                 if (sb_dqopt(sb)->files[i])
1902                         vfs_quota_off(sb, i, 0);
1903         }
1904 #endif
1905         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1906 }
1907 /*
1908  * Maximal extent format file size.
1909  * Resulting logical blkno at s_maxbytes must fit in our on-disk
1910  * extent format containers, within a sector_t, and within i_blocks
1911  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
1912  * so that won't be a limiting factor.
1913  *
1914  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1915  */
1916 static loff_t ext4_max_size(int blkbits, int has_huge_files)
1917 {
1918         loff_t res;
1919         loff_t upper_limit = MAX_LFS_FILESIZE;
1920
1921         /* small i_blocks in vfs inode? */
1922         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1923                 /*
1924                  * CONFIG_LBD is not enabled implies the inode
1925                  * i_block represent total blocks in 512 bytes
1926                  * 32 == size of vfs inode i_blocks * 8
1927                  */
1928                 upper_limit = (1LL << 32) - 1;
1929
1930                 /* total blocks in file system block size */
1931                 upper_limit >>= (blkbits - 9);
1932                 upper_limit <<= blkbits;
1933         }
1934
1935         /* 32-bit extent-start container, ee_block */
1936         res = 1LL << 32;
1937         res <<= blkbits;
1938         res -= 1;
1939
1940         /* Sanity check against vm- & vfs- imposed limits */
1941         if (res > upper_limit)
1942                 res = upper_limit;
1943
1944         return res;
1945 }
1946
1947 /*
1948  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
1949  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1950  * We need to be 1 filesystem block less than the 2^48 sector limit.
1951  */
1952 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1953 {
1954         loff_t res = EXT4_NDIR_BLOCKS;
1955         int meta_blocks;
1956         loff_t upper_limit;
1957         /* This is calculated to be the largest file size for a
1958          * dense, bitmapped file such that the total number of
1959          * sectors in the file, including data and all indirect blocks,
1960          * does not exceed 2^48 -1
1961          * __u32 i_blocks_lo and _u16 i_blocks_high representing the
1962          * total number of  512 bytes blocks of the file
1963          */
1964
1965         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1966                 /*
1967                  * !has_huge_files or CONFIG_LBD is not enabled
1968                  * implies the inode i_block represent total blocks in
1969                  * 512 bytes 32 == size of vfs inode i_blocks * 8
1970                  */
1971                 upper_limit = (1LL << 32) - 1;
1972
1973                 /* total blocks in file system block size */
1974                 upper_limit >>= (bits - 9);
1975
1976         } else {
1977                 /*
1978                  * We use 48 bit ext4_inode i_blocks
1979                  * With EXT4_HUGE_FILE_FL set the i_blocks
1980                  * represent total number of blocks in
1981                  * file system block size
1982                  */
1983                 upper_limit = (1LL << 48) - 1;
1984
1985         }
1986
1987         /* indirect blocks */
1988         meta_blocks = 1;
1989         /* double indirect blocks */
1990         meta_blocks += 1 + (1LL << (bits-2));
1991         /* tripple indirect blocks */
1992         meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1993
1994         upper_limit -= meta_blocks;
1995         upper_limit <<= bits;
1996
1997         res += 1LL << (bits-2);
1998         res += 1LL << (2*(bits-2));
1999         res += 1LL << (3*(bits-2));
2000         res <<= bits;
2001         if (res > upper_limit)
2002                 res = upper_limit;
2003
2004         if (res > MAX_LFS_FILESIZE)
2005                 res = MAX_LFS_FILESIZE;
2006
2007         return res;
2008 }
2009
2010 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2011                                 ext4_fsblk_t logical_sb_block, int nr)
2012 {
2013         struct ext4_sb_info *sbi = EXT4_SB(sb);
2014         ext4_group_t bg, first_meta_bg;
2015         int has_super = 0;
2016
2017         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2018
2019         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2020             nr < first_meta_bg)
2021                 return logical_sb_block + nr + 1;
2022         bg = sbi->s_desc_per_block * nr;
2023         if (ext4_bg_has_super(sb, bg))
2024                 has_super = 1;
2025         return (has_super + ext4_group_first_block_no(sb, bg));
2026 }
2027
2028 /**
2029  * ext4_get_stripe_size: Get the stripe size.
2030  * @sbi: In memory super block info
2031  *
2032  * If we have specified it via mount option, then
2033  * use the mount option value. If the value specified at mount time is
2034  * greater than the blocks per group use the super block value.
2035  * If the super block value is greater than blocks per group return 0.
2036  * Allocator needs it be less than blocks per group.
2037  *
2038  */
2039 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2040 {
2041         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2042         unsigned long stripe_width =
2043                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2044
2045         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2046                 return sbi->s_stripe;
2047
2048         if (stripe_width <= sbi->s_blocks_per_group)
2049                 return stripe_width;
2050
2051         if (stride <= sbi->s_blocks_per_group)
2052                 return stride;
2053
2054         return 0;
2055 }
2056
2057 /* sysfs supprt */
2058
2059 struct ext4_attr {
2060         struct attribute attr;
2061         ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2062         ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 
2063                          const char *, size_t);
2064         int offset;
2065 };
2066
2067 static int parse_strtoul(const char *buf,
2068                 unsigned long max, unsigned long *value)
2069 {
2070         char *endp;
2071
2072         while (*buf && isspace(*buf))
2073                 buf++;
2074         *value = simple_strtoul(buf, &endp, 0);
2075         while (*endp && isspace(*endp))
2076                 endp++;
2077         if (*endp || *value > max)
2078                 return -EINVAL;
2079
2080         return 0;
2081 }
2082
2083 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2084                                               struct ext4_sb_info *sbi,
2085                                               char *buf)
2086 {
2087         return snprintf(buf, PAGE_SIZE, "%llu\n",
2088                         (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
2089 }
2090
2091 static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2092                                          struct ext4_sb_info *sbi, char *buf)
2093 {
2094         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2095
2096         return snprintf(buf, PAGE_SIZE, "%lu\n",
2097                         (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2098                          sbi->s_sectors_written_start) >> 1);
2099 }
2100
2101 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2102                                           struct ext4_sb_info *sbi, char *buf)
2103 {
2104         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2105
2106         return snprintf(buf, PAGE_SIZE, "%llu\n",
2107                         sbi->s_kbytes_written + 
2108                         ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2109                           EXT4_SB(sb)->s_sectors_written_start) >> 1));
2110 }
2111
2112 static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2113                                           struct ext4_sb_info *sbi,
2114                                           const char *buf, size_t count)
2115 {
2116         unsigned long t;
2117
2118         if (parse_strtoul(buf, 0x40000000, &t))
2119                 return -EINVAL;
2120
2121         if (!is_power_of_2(t))
2122                 return -EINVAL;
2123
2124         sbi->s_inode_readahead_blks = t;
2125         return count;
2126 }
2127
2128 static ssize_t sbi_ui_show(struct ext4_attr *a,
2129                                 struct ext4_sb_info *sbi, char *buf)
2130 {
2131         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2132
2133         return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2134 }
2135
2136 static ssize_t sbi_ui_store(struct ext4_attr *a,
2137                             struct ext4_sb_info *sbi,
2138                             const char *buf, size_t count)
2139 {
2140         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2141         unsigned long t;
2142
2143         if (parse_strtoul(buf, 0xffffffff, &t))
2144                 return -EINVAL;
2145         *ui = t;
2146         return count;
2147 }
2148
2149 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2150 static struct ext4_attr ext4_attr_##_name = {                   \
2151         .attr = {.name = __stringify(_name), .mode = _mode },   \
2152         .show   = _show,                                        \
2153         .store  = _store,                                       \
2154         .offset = offsetof(struct ext4_sb_info, _elname),       \
2155 }
2156 #define EXT4_ATTR(name, mode, show, store) \
2157 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2158
2159 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2160 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2161 #define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2162         EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2163 #define ATTR_LIST(name) &ext4_attr_##name.attr
2164
2165 EXT4_RO_ATTR(delayed_allocation_blocks);
2166 EXT4_RO_ATTR(session_write_kbytes);
2167 EXT4_RO_ATTR(lifetime_write_kbytes);
2168 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2169                  inode_readahead_blks_store, s_inode_readahead_blks);
2170 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2171 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2172 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2173 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2174 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2175 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2176
2177 static struct attribute *ext4_attrs[] = {
2178         ATTR_LIST(delayed_allocation_blocks),
2179         ATTR_LIST(session_write_kbytes),
2180         ATTR_LIST(lifetime_write_kbytes),
2181         ATTR_LIST(inode_readahead_blks),
2182         ATTR_LIST(mb_stats),
2183         ATTR_LIST(mb_max_to_scan),
2184         ATTR_LIST(mb_min_to_scan),
2185         ATTR_LIST(mb_order2_req),
2186         ATTR_LIST(mb_stream_req),
2187         ATTR_LIST(mb_group_prealloc),
2188         NULL,
2189 };
2190
2191 static ssize_t ext4_attr_show(struct kobject *kobj,
2192                               struct attribute *attr, char *buf)
2193 {
2194         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2195                                                 s_kobj);
2196         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2197
2198         return a->show ? a->show(a, sbi, buf) : 0;
2199 }
2200
2201 static ssize_t ext4_attr_store(struct kobject *kobj,
2202                                struct attribute *attr,
2203                                const char *buf, size_t len)
2204 {
2205         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2206                                                 s_kobj);
2207         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2208
2209         return a->store ? a->store(a, sbi, buf, len) : 0;
2210 }
2211
2212 static void ext4_sb_release(struct kobject *kobj)
2213 {
2214         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2215                                                 s_kobj);
2216         complete(&sbi->s_kobj_unregister);
2217 }
2218
2219
2220 static struct sysfs_ops ext4_attr_ops = {
2221         .show   = ext4_attr_show,
2222         .store  = ext4_attr_store,
2223 };
2224
2225 static struct kobj_type ext4_ktype = {
2226         .default_attrs  = ext4_attrs,
2227         .sysfs_ops      = &ext4_attr_ops,
2228         .release        = ext4_sb_release,
2229 };
2230
2231 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2232                                 __releases(kernel_lock)
2233                                 __acquires(kernel_lock)
2234
2235 {
2236         struct buffer_head *bh;
2237         struct ext4_super_block *es = NULL;
2238         struct ext4_sb_info *sbi;
2239         ext4_fsblk_t block;
2240         ext4_fsblk_t sb_block = get_sb_block(&data);
2241         ext4_fsblk_t logical_sb_block;
2242         unsigned long offset = 0;
2243         unsigned long journal_devnum = 0;
2244         unsigned long def_mount_opts;
2245         struct inode *root;
2246         char *cp;
2247         const char *descr;
2248         int ret = -EINVAL;
2249         int blocksize;
2250         unsigned int db_count;
2251         unsigned int i;
2252         int needs_recovery, has_huge_files;
2253         int features;
2254         __u64 blocks_count;
2255         int err;
2256         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2257
2258         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2259         if (!sbi)
2260                 return -ENOMEM;
2261
2262         sbi->s_blockgroup_lock =
2263                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
2264         if (!sbi->s_blockgroup_lock) {
2265                 kfree(sbi);
2266                 return -ENOMEM;
2267         }
2268         sb->s_fs_info = sbi;
2269         sbi->s_mount_opt = 0;
2270         sbi->s_resuid = EXT4_DEF_RESUID;
2271         sbi->s_resgid = EXT4_DEF_RESGID;
2272         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2273         sbi->s_sb_block = sb_block;
2274         sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
2275                                                       sectors[1]);
2276
2277         unlock_kernel();
2278
2279         /* Cleanup superblock name */
2280         for (cp = sb->s_id; (cp = strchr(cp, '/'));)
2281                 *cp = '!';
2282
2283         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2284         if (!blocksize) {
2285                 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
2286                 goto out_fail;
2287         }
2288
2289         /*
2290          * The ext4 superblock will not be buffer aligned for other than 1kB
2291          * block sizes.  We need to calculate the offset from buffer start.
2292          */
2293         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
2294                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2295                 offset = do_div(logical_sb_block, blocksize);
2296         } else {
2297                 logical_sb_block = sb_block;
2298         }
2299
2300         if (!(bh = sb_bread(sb, logical_sb_block))) {
2301                 printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
2302                 goto out_fail;
2303         }
2304         /*
2305          * Note: s_es must be initialized as soon as possible because
2306          *       some ext4 macro-instructions depend on its value
2307          */
2308         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2309         sbi->s_es = es;
2310         sb->s_magic = le16_to_cpu(es->s_magic);
2311         if (sb->s_magic != EXT4_SUPER_MAGIC)
2312                 goto cantfind_ext4;
2313         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
2314
2315         /* Set defaults before we parse the mount options */
2316         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2317         if (def_mount_opts & EXT4_DEFM_DEBUG)
2318                 set_opt(sbi->s_mount_opt, DEBUG);
2319         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
2320                 set_opt(sbi->s_mount_opt, GRPID);
2321         if (def_mount_opts & EXT4_DEFM_UID16)
2322                 set_opt(sbi->s_mount_opt, NO_UID32);
2323 #ifdef CONFIG_EXT4_FS_XATTR
2324         if (def_mount_opts & EXT4_DEFM_XATTR_USER)
2325                 set_opt(sbi->s_mount_opt, XATTR_USER);
2326 #endif
2327 #ifdef CONFIG_EXT4_FS_POSIX_ACL
2328         if (def_mount_opts & EXT4_DEFM_ACL)
2329                 set_opt(sbi->s_mount_opt, POSIX_ACL);
2330 #endif
2331         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2332                 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
2333         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2334                 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
2335         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2336                 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
2337
2338         if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2339                 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
2340         else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
2341                 set_opt(sbi->s_mount_opt, ERRORS_CONT);
2342         else
2343                 set_opt(sbi->s_mount_opt, ERRORS_RO);
2344
2345         sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
2346         sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
2347         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2348         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2349         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2350
2351         set_opt(sbi->s_mount_opt, BARRIER);
2352
2353         /*
2354          * enable delayed allocation by default
2355          * Use -o nodelalloc to turn it off
2356          */
2357         set_opt(sbi->s_mount_opt, DELALLOC);
2358
2359
2360         if (!parse_options((char *) data, sb, &journal_devnum,
2361                            &journal_ioprio, NULL, 0))
2362                 goto failed_mount;
2363
2364         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2365                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2366
2367         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2368             (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2369              EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2370              EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2371                 printk(KERN_WARNING
2372                        "EXT4-fs warning: feature flags set on rev 0 fs, "
2373                        "running e2fsck is recommended\n");
2374
2375         /*
2376          * Check feature flags regardless of the revision level, since we
2377          * previously didn't change the revision level when setting the flags,
2378          * so there is a chance incompat flags are set on a rev 0 filesystem.
2379          */
2380         features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2381         if (features) {
2382                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
2383                        "unsupported optional features (%x).\n", sb->s_id,
2384                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2385                         ~EXT4_FEATURE_INCOMPAT_SUPP));
2386                 goto failed_mount;
2387         }
2388         features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2389         if (!(sb->s_flags & MS_RDONLY) && features) {
2390                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
2391                        "unsupported optional features (%x).\n", sb->s_id,
2392                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2393                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
2394                 goto failed_mount;
2395         }
2396         has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2397                                     EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2398         if (has_huge_files) {
2399                 /*
2400                  * Large file size enabled file system can only be
2401                  * mount if kernel is build with CONFIG_LBD
2402                  */
2403                 if (sizeof(root->i_blocks) < sizeof(u64) &&
2404                                 !(sb->s_flags & MS_RDONLY)) {
2405                         printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2406                                         "files cannot be mounted read-write "
2407                                         "without CONFIG_LBD.\n", sb->s_id);
2408                         goto failed_mount;
2409                 }
2410         }
2411         blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
2412
2413         if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2414             blocksize > EXT4_MAX_BLOCK_SIZE) {
2415                 printk(KERN_ERR
2416                        "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
2417                        blocksize, sb->s_id);
2418                 goto failed_mount;
2419         }
2420
2421         if (sb->s_blocksize != blocksize) {
2422
2423                 /* Validate the filesystem blocksize */
2424                 if (!sb_set_blocksize(sb, blocksize)) {
2425                         printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
2426                                         blocksize);
2427                         goto failed_mount;
2428                 }
2429
2430                 brelse(bh);
2431                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2432                 offset = do_div(logical_sb_block, blocksize);
2433                 bh = sb_bread(sb, logical_sb_block);
2434                 if (!bh) {
2435                         printk(KERN_ERR
2436                                "EXT4-fs: Can't read superblock on 2nd try.\n");
2437                         goto failed_mount;
2438                 }
2439                 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2440                 sbi->s_es = es;
2441                 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2442                         printk(KERN_ERR
2443                                "EXT4-fs: Magic mismatch, very weird !\n");
2444                         goto failed_mount;
2445                 }
2446         }
2447
2448         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2449                                                       has_huge_files);
2450         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2451
2452         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2453                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
2454                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
2455         } else {
2456                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
2457                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
2458                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2459                     (!is_power_of_2(sbi->s_inode_size)) ||
2460                     (sbi->s_inode_size > blocksize)) {
2461                         printk(KERN_ERR
2462                                "EXT4-fs: unsupported inode size: %d\n",
2463                                sbi->s_inode_size);
2464                         goto failed_mount;
2465                 }
2466                 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2467                         sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2468         }
2469         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2470         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2471                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2472                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2473                     !is_power_of_2(sbi->s_desc_size)) {
2474                         printk(KERN_ERR
2475                                "EXT4-fs: unsupported descriptor size %lu\n",
2476                                sbi->s_desc_size);
2477                         goto failed_mount;
2478                 }
2479         } else
2480                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2481         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2482         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2483         if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2484                 goto cantfind_ext4;
2485         sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2486         if (sbi->s_inodes_per_block == 0)
2487                 goto cantfind_ext4;
2488         sbi->s_itb_per_group = sbi->s_inodes_per_group /
2489                                         sbi->s_inodes_per_block;
2490         sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
2491         sbi->s_sbh = bh;
2492         sbi->s_mount_state = le16_to_cpu(es->s_state);
2493         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2494         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2495         for (i = 0; i < 4; i++)
2496                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2497         sbi->s_def_hash_version = es->s_def_hash_version;
2498         i = le32_to_cpu(es->s_flags);
2499         if (i & EXT2_FLAGS_UNSIGNED_HASH)
2500                 sbi->s_hash_unsigned = 3;
2501         else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
2502 #ifdef __CHAR_UNSIGNED__
2503                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
2504                 sbi->s_hash_unsigned = 3;
2505 #else
2506                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
2507 #endif
2508                 sb->s_dirt = 1;
2509         }
2510
2511         if (sbi->s_blocks_per_group > blocksize * 8) {
2512                 printk(KERN_ERR
2513                        "EXT4-fs: #blocks per group too big: %lu\n",
2514                        sbi->s_blocks_per_group);
2515                 goto failed_mount;
2516         }
2517         if (sbi->s_inodes_per_group > blocksize * 8) {
2518                 printk(KERN_ERR
2519                        "EXT4-fs: #inodes per group too big: %lu\n",
2520                        sbi->s_inodes_per_group);
2521                 goto failed_mount;
2522         }
2523
2524         if (ext4_blocks_count(es) >
2525                     (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2526                 printk(KERN_ERR "EXT4-fs: filesystem on %s:"
2527                         " too large to mount safely\n", sb->s_id);
2528                 if (sizeof(sector_t) < 8)
2529                         printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
2530                                         "enabled\n");
2531                 goto failed_mount;
2532         }
2533
2534         if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2535                 goto cantfind_ext4;
2536
2537         /* check blocks count against device size */
2538         blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2539         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2540                 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
2541                        "exceeds size of device (%llu blocks)\n",
2542                        ext4_blocks_count(es), blocks_count);
2543                 goto failed_mount;
2544         }
2545
2546         /*
2547          * It makes no sense for the first data block to be beyond the end
2548          * of the filesystem.
2549          */
2550         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2551                 printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
2552                        "block %u is beyond end of filesystem (%llu)\n",
2553                        le32_to_cpu(es->s_first_data_block),
2554                        ext4_blocks_count(es));
2555                 goto failed_mount;
2556         }
2557         blocks_count = (ext4_blocks_count(es) -
2558                         le32_to_cpu(es->s_first_data_block) +
2559                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
2560         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2561         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2562                 printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
2563                        "(block count %llu, first data block %u, "
2564                        "blocks per group %lu)\n", sbi->s_groups_count,
2565                        ext4_blocks_count(es),
2566                        le32_to_cpu(es->s_first_data_block),
2567                        EXT4_BLOCKS_PER_GROUP(sb));
2568                 goto failed_mount;
2569         }
2570         sbi->s_groups_count = blocks_count;
2571         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2572                    EXT4_DESC_PER_BLOCK(sb);
2573         sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2574                                     GFP_KERNEL);
2575         if (sbi->s_group_desc == NULL) {
2576                 printk(KERN_ERR "EXT4-fs: not enough memory\n");
2577                 goto failed_mount;
2578         }
2579
2580 #ifdef CONFIG_PROC_FS
2581         if (ext4_proc_root)
2582                 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2583 #endif
2584
2585         bgl_lock_init(sbi->s_blockgroup_lock);
2586
2587         for (i = 0; i < db_count; i++) {
2588                 block = descriptor_loc(sb, logical_sb_block, i);
2589                 sbi->s_group_desc[i] = sb_bread(sb, block);
2590                 if (!sbi->s_group_desc[i]) {
2591                         printk(KERN_ERR "EXT4-fs: "
2592                                "can't read group descriptor %d\n", i);
2593                         db_count = i;
2594                         goto failed_mount2;
2595                 }
2596         }
2597         if (!ext4_check_descriptors(sb)) {
2598                 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2599                 goto failed_mount2;
2600         }
2601         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2602                 if (!ext4_fill_flex_info(sb)) {
2603                         printk(KERN_ERR
2604                                "EXT4-fs: unable to initialize "
2605                                "flex_bg meta info!\n");
2606                         goto failed_mount2;
2607                 }
2608
2609         sbi->s_gdb_count = db_count;
2610         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
2611         spin_lock_init(&sbi->s_next_gen_lock);
2612
2613         err = percpu_counter_init(&sbi->s_freeblocks_counter,
2614                         ext4_count_free_blocks(sb));
2615         if (!err) {
2616                 err = percpu_counter_init(&sbi->s_freeinodes_counter,
2617                                 ext4_count_free_inodes(sb));
2618         }
2619         if (!err) {
2620                 err = percpu_counter_init(&sbi->s_dirs_counter,
2621                                 ext4_count_dirs(sb));
2622         }
2623         if (!err) {
2624                 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2625         }
2626         if (err) {
2627                 printk(KERN_ERR "EXT4-fs: insufficient memory\n");
2628                 goto failed_mount3;
2629         }
2630
2631         sbi->s_stripe = ext4_get_stripe_size(sbi);
2632
2633         /*
2634          * set up enough so that it can read an inode
2635          */
2636         if (!test_opt(sb, NOLOAD) &&
2637             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
2638                 sb->s_op = &ext4_sops;
2639         else
2640                 sb->s_op = &ext4_nojournal_sops;
2641         sb->s_export_op = &ext4_export_ops;
2642         sb->s_xattr = ext4_xattr_handlers;
2643 #ifdef CONFIG_QUOTA
2644         sb->s_qcop = &ext4_qctl_operations;
2645         sb->dq_op = &ext4_quota_operations;
2646 #endif
2647         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2648         mutex_init(&sbi->s_orphan_lock);
2649
2650         sb->s_root = NULL;
2651
2652         needs_recovery = (es->s_last_orphan != 0 ||
2653                           EXT4_HAS_INCOMPAT_FEATURE(sb,
2654                                     EXT4_FEATURE_INCOMPAT_RECOVER));
2655
2656         /*
2657          * The first inode we look at is the journal inode.  Don't try
2658          * root first: it may be modified in the journal!
2659          */
2660         if (!test_opt(sb, NOLOAD) &&
2661             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2662                 if (ext4_load_journal(sb, es, journal_devnum))
2663                         goto failed_mount3;
2664                 if (!(sb->s_flags & MS_RDONLY) &&
2665                     EXT4_SB(sb)->s_journal->j_failed_commit) {
2666                         printk(KERN_CRIT "EXT4-fs error (device %s): "
2667                                "ext4_fill_super: Journal transaction "
2668                                "%u is corrupt\n", sb->s_id,
2669                                EXT4_SB(sb)->s_journal->j_failed_commit);
2670                         if (test_opt(sb, ERRORS_RO)) {
2671                                 printk(KERN_CRIT
2672                                        "Mounting filesystem read-only\n");
2673                                 sb->s_flags |= MS_RDONLY;
2674                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2675                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2676                         }
2677                         if (test_opt(sb, ERRORS_PANIC)) {
2678                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2679                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2680                                 ext4_commit_super(sb, 1);
2681                                 goto failed_mount4;
2682                         }
2683                 }
2684         } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2685               EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2686                 printk(KERN_ERR "EXT4-fs: required journal recovery "
2687                        "suppressed and not mounted read-only\n");
2688                 goto failed_mount4;
2689         } else {
2690                 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2691                 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2692                 sbi->s_journal = NULL;
2693                 needs_recovery = 0;
2694                 goto no_journal;
2695         }
2696
2697         if (ext4_blocks_count(es) > 0xffffffffULL &&
2698             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2699                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
2700                 printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
2701                 goto failed_mount4;
2702         }
2703
2704         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2705                 jbd2_journal_set_features(sbi->s_journal,
2706                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2707                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2708         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2709                 jbd2_journal_set_features(sbi->s_journal,
2710                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2711                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2712                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2713         } else {
2714                 jbd2_journal_clear_features(sbi->s_journal,
2715                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2716                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2717         }
2718
2719         /* We have now updated the journal if required, so we can
2720          * validate the data journaling mode. */
2721         switch (test_opt(sb, DATA_FLAGS)) {
2722         case 0:
2723                 /* No mode set, assume a default based on the journal
2724                  * capabilities: ORDERED_DATA if the journal can
2725                  * cope, else JOURNAL_DATA
2726                  */
2727                 if (jbd2_journal_check_available_features
2728                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
2729                         set_opt(sbi->s_mount_opt, ORDERED_DATA);
2730                 else
2731                         set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2732                 break;
2733
2734         case EXT4_MOUNT_ORDERED_DATA:
2735         case EXT4_MOUNT_WRITEBACK_DATA:
2736                 if (!jbd2_journal_check_available_features
2737                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2738                         printk(KERN_ERR "EXT4-fs: Journal does not support "
2739                                "requested data journaling mode\n");
2740                         goto failed_mount4;
2741                 }
2742         default:
2743                 break;
2744         }
2745         set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2746
2747 no_journal:
2748
2749         if (test_opt(sb, NOBH)) {
2750                 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2751                         printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
2752                                 "its supported only with writeback mode\n");
2753                         clear_opt(sbi->s_mount_opt, NOBH);
2754                 }
2755         }
2756         /*
2757          * The jbd2_journal_load will have done any necessary log recovery,
2758          * so we can safely mount the rest of the filesystem now.
2759          */
2760
2761         root = ext4_iget(sb, EXT4_ROOT_INO);
2762         if (IS_ERR(root)) {
2763                 printk(KERN_ERR "EXT4-fs: get root inode failed\n");
2764                 ret = PTR_ERR(root);
2765                 goto failed_mount4;
2766         }
2767         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2768                 iput(root);
2769                 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
2770                 goto failed_mount4;
2771         }
2772         sb->s_root = d_alloc_root(root);
2773         if (!sb->s_root) {
2774                 printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
2775                 iput(root);
2776                 ret = -ENOMEM;
2777                 goto failed_mount4;
2778         }
2779
2780         ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2781
2782         /* determine the minimum size of new large inodes, if present */
2783         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
2784                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2785                                                      EXT4_GOOD_OLD_INODE_SIZE;
2786                 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
2787                                        EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
2788                         if (sbi->s_want_extra_isize <
2789                             le16_to_cpu(es->s_want_extra_isize))
2790                                 sbi->s_want_extra_isize =
2791                                         le16_to_cpu(es->s_want_extra_isize);
2792                         if (sbi->s_want_extra_isize <
2793                             le16_to_cpu(es->s_min_extra_isize))
2794                                 sbi->s_want_extra_isize =
2795                                         le16_to_cpu(es->s_min_extra_isize);
2796                 }
2797         }
2798         /* Check if enough inode space is available */
2799         if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
2800                                                         sbi->s_inode_size) {
2801                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2802                                                        EXT4_GOOD_OLD_INODE_SIZE;
2803                 printk(KERN_INFO "EXT4-fs: required extra inode space not"
2804                         "available.\n");
2805         }
2806
2807         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2808                 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2809                                 "requested data journaling mode\n");
2810                 clear_opt(sbi->s_mount_opt, DELALLOC);
2811         } else if (test_opt(sb, DELALLOC))
2812                 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2813
2814         ext4_ext_init(sb);
2815         err = ext4_mb_init(sb, needs_recovery);
2816         if (err) {
2817                 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2818                        err);
2819                 goto failed_mount4;
2820         }
2821
2822         sbi->s_kobj.kset = ext4_kset;
2823         init_completion(&sbi->s_kobj_unregister);
2824         err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
2825                                    "%s", sb->s_id);
2826         if (err) {
2827                 ext4_mb_release(sb);
2828                 ext4_ext_release(sb);
2829                 goto failed_mount4;
2830         };
2831
2832         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2833         ext4_orphan_cleanup(sb, es);
2834         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2835         if (needs_recovery) {
2836                 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2837                 ext4_mark_recovery_complete(sb, es);
2838         }
2839         if (EXT4_SB(sb)->s_journal) {
2840                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2841                         descr = " journalled data mode";
2842                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2843                         descr = " ordered data mode";
2844                 else
2845                         descr = " writeback data mode";
2846         } else
2847                 descr = "out journal";
2848
2849         printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
2850                sb->s_id, descr);
2851
2852         lock_kernel();
2853         return 0;
2854
2855 cantfind_ext4:
2856         if (!silent)
2857                 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
2858                        sb->s_id);
2859         goto failed_mount;
2860
2861 failed_mount4:
2862         printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
2863         if (sbi->s_journal) {
2864                 jbd2_journal_destroy(sbi->s_journal);
2865                 sbi->s_journal = NULL;
2866         }
2867 failed_mount3:
2868         if (sbi->s_flex_groups) {
2869                 if (is_vmalloc_addr(sbi->s_flex_groups))
2870                         vfree(sbi->s_flex_groups);
2871                 else
2872                         kfree(sbi->s_flex_groups);
2873         }
2874         percpu_counter_destroy(&sbi->s_freeblocks_counter);
2875         percpu_counter_destroy(&sbi->s_freeinodes_counter);
2876         percpu_counter_destroy(&sbi->s_dirs_counter);
2877         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2878 failed_mount2:
2879         for (i = 0; i < db_count; i++)
2880                 brelse(sbi->s_group_desc[i]);
2881         kfree(sbi->s_group_desc);
2882 failed_mount:
2883         if (sbi->s_proc) {
2884                 remove_proc_entry(sb->s_id, ext4_proc_root);
2885         }
2886 #ifdef CONFIG_QUOTA
2887         for (i = 0; i < MAXQUOTAS; i++)
2888                 kfree(sbi->s_qf_names[i]);
2889 #endif
2890         ext4_blkdev_remove(sbi);
2891         brelse(bh);
2892 out_fail:
2893         sb->s_fs_info = NULL;
2894         kfree(sbi);
2895         lock_kernel();
2896         return ret;
2897 }
2898
2899 /*
2900  * Setup any per-fs journal parameters now.  We'll do this both on
2901  * initial mount, once the journal has been initialised but before we've
2902  * done any recovery; and again on any subsequent remount.
2903  */
2904 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2905 {
2906         struct ext4_sb_info *sbi = EXT4_SB(sb);
2907
2908         journal->j_commit_interval = sbi->s_commit_interval;
2909         journal->j_min_batch_time = sbi->s_min_batch_time;
2910         journal->j_max_batch_time = sbi->s_max_batch_time;
2911
2912         spin_lock(&journal->j_state_lock);
2913         if (test_opt(sb, BARRIER))
2914                 journal->j_flags |= JBD2_BARRIER;
2915         else
2916                 journal->j_flags &= ~JBD2_BARRIER;
2917         if (test_opt(sb, DATA_ERR_ABORT))
2918                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2919         else
2920                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2921         spin_unlock(&journal->j_state_lock);
2922 }
2923
2924 static journal_t *ext4_get_journal(struct super_block *sb,
2925                                    unsigned int journal_inum)
2926 {
2927         struct inode *journal_inode;
2928         journal_t *journal;
2929
2930         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2931
2932         /* First, test for the existence of a valid inode on disk.  Bad
2933          * things happen if we iget() an unused inode, as the subsequent
2934          * iput() will try to delete it. */
2935
2936         journal_inode = ext4_iget(sb, journal_inum);
2937         if (IS_ERR(journal_inode)) {
2938                 printk(KERN_ERR "EXT4-fs: no journal found.\n");
2939                 return NULL;
2940         }
2941         if (!journal_inode->i_nlink) {
2942                 make_bad_inode(journal_inode);
2943                 iput(journal_inode);
2944                 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
2945                 return NULL;
2946         }
2947
2948         jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2949                   journal_inode, journal_inode->i_size);
2950         if (!S_ISREG(journal_inode->i_mode)) {
2951                 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
2952                 iput(journal_inode);
2953                 return NULL;
2954         }
2955
2956         journal = jbd2_journal_init_inode(journal_inode);
2957         if (!journal) {
2958                 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
2959                 iput(journal_inode);
2960                 return NULL;
2961         }
2962         journal->j_private = sb;
2963         ext4_init_journal_params(sb, journal);
2964         return journal;
2965 }
2966
2967 static journal_t *ext4_get_dev_journal(struct super_block *sb,
2968                                        dev_t j_dev)
2969 {
2970         struct buffer_head *bh;
2971         journal_t *journal;
2972         ext4_fsblk_t start;
2973         ext4_fsblk_t len;
2974         int hblock, blocksize;
2975         ext4_fsblk_t sb_block;
2976         unsigned long offset;
2977         struct ext4_super_block *es;
2978         struct block_device *bdev;
2979
2980         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2981
2982         bdev = ext4_blkdev_get(j_dev);
2983         if (bdev == NULL)
2984                 return NULL;
2985
2986         if (bd_claim(bdev, sb)) {
2987                 printk(KERN_ERR
2988                         "EXT4-fs: failed to claim external journal device.\n");
2989                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2990                 return NULL;
2991         }
2992
2993         blocksize = sb->s_blocksize;
2994         hblock = bdev_hardsect_size(bdev);
2995         if (blocksize < hblock) {
2996                 printk(KERN_ERR
2997                         "EXT4-fs: blocksize too small for journal device.\n");
2998                 goto out_bdev;
2999         }
3000
3001         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
3002         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
3003         set_blocksize(bdev, blocksize);
3004         if (!(bh = __bread(bdev, sb_block, blocksize))) {
3005                 printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
3006                        "external journal\n");
3007                 goto out_bdev;
3008         }
3009
3010         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
3011         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
3012             !(le32_to_cpu(es->s_feature_incompat) &
3013               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
3014                 printk(KERN_ERR "EXT4-fs: external journal has "
3015                                         "bad superblock\n");
3016                 brelse(bh);
3017                 goto out_bdev;
3018         }
3019
3020         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
3021                 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
3022                 brelse(bh);
3023                 goto out_bdev;
3024         }
3025
3026         len = ext4_blocks_count(es);
3027         start = sb_block + 1;
3028         brelse(bh);     /* we're done with the superblock */
3029
3030         journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
3031                                         start, len, blocksize);
3032         if (!journal) {
3033                 printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
3034                 goto out_bdev;
3035         }
3036         journal->j_private = sb;
3037         ll_rw_block(READ, 1, &journal->j_sb_buffer);
3038         wait_on_buffer(journal->j_sb_buffer);
3039         if (!buffer_uptodate(journal->j_sb_buffer)) {
3040                 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
3041                 goto out_journal;
3042         }
3043         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
3044                 printk(KERN_ERR "EXT4-fs: External journal has more than one "
3045                                         "user (unsupported) - %d\n",
3046                         be32_to_cpu(journal->j_superblock->s_nr_users));
3047                 goto out_journal;
3048         }
3049         EXT4_SB(sb)->journal_bdev = bdev;
3050         ext4_init_journal_params(sb, journal);
3051         return journal;
3052 out_journal:
3053         jbd2_journal_destroy(journal);
3054 out_bdev:
3055         ext4_blkdev_put(bdev);
3056         return NULL;
3057 }
3058
3059 static int ext4_load_journal(struct super_block *sb,
3060                              struct ext4_super_block *es,
3061                              unsigned long journal_devnum)
3062 {
3063         journal_t *journal;
3064         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
3065         dev_t journal_dev;
3066         int err = 0;
3067         int really_read_only;
3068
3069         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3070
3071         if (journal_devnum &&
3072             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3073                 printk(KERN_INFO "EXT4-fs: external journal device major/minor "
3074                         "numbers have changed\n");
3075                 journal_dev = new_decode_dev(journal_devnum);
3076         } else
3077                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
3078
3079         really_read_only = bdev_read_only(sb->s_bdev);
3080
3081         /*
3082          * Are we loading a blank journal or performing recovery after a
3083          * crash?  For recovery, we need to check in advance whether we
3084          * can get read-write access to the device.
3085          */
3086
3087         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3088                 if (sb->s_flags & MS_RDONLY) {
3089                         printk(KERN_INFO "EXT4-fs: INFO: recovery "
3090                                         "required on readonly filesystem.\n");
3091                         if (really_read_only) {
3092                                 printk(KERN_ERR "EXT4-fs: write access "
3093                                         "unavailable, cannot proceed.\n");
3094                                 return -EROFS;
3095                         }
3096                         printk(KERN_INFO "EXT4-fs: write access will "
3097                                "be enabled during recovery.\n");
3098                 }
3099         }
3100
3101         if (journal_inum && journal_dev) {
3102                 printk(KERN_ERR "EXT4-fs: filesystem has both journal "
3103                        "and inode journals!\n");
3104                 return -EINVAL;
3105         }
3106
3107         if (journal_inum) {
3108                 if (!(journal = ext4_get_journal(sb, journal_inum)))
3109                         return -EINVAL;
3110         } else {
3111                 if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
3112                         return -EINVAL;
3113         }
3114
3115         if (journal->j_flags & JBD2_BARRIER)
3116                 printk(KERN_INFO "EXT4-fs: barriers enabled\n");
3117         else
3118                 printk(KERN_INFO "EXT4-fs: barriers disabled\n");
3119
3120         if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
3121                 err = jbd2_journal_update_format(journal);
3122                 if (err)  {
3123                         printk(KERN_ERR "EXT4-fs: error updating journal.\n");
3124                         jbd2_journal_destroy(journal);
3125                         return err;
3126                 }
3127         }
3128
3129         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
3130                 err = jbd2_journal_wipe(journal, !really_read_only);
3131         if (!err)
3132                 err = jbd2_journal_load(journal);
3133
3134         if (err) {
3135                 printk(KERN_ERR "EXT4-fs: error loading journal.\n");
3136                 jbd2_journal_destroy(journal);
3137                 return err;
3138         }
3139
3140         EXT4_SB(sb)->s_journal = journal;
3141         ext4_clear_journal_err(sb, es);
3142
3143         if (journal_devnum &&
3144             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3145                 es->s_journal_dev = cpu_to_le32(journal_devnum);
3146
3147                 /* Make sure we flush the recovery flag to disk. */
3148                 ext4_commit_super(sb, 1);
3149         }
3150
3151         return 0;
3152 }
3153
3154 static int ext4_commit_super(struct super_block *sb, int sync)
3155 {
3156         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
3157         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
3158         int error = 0;
3159
3160         if (!sbh)
3161                 return error;
3162         if (buffer_write_io_error(sbh)) {
3163                 /*
3164                  * Oh, dear.  A previous attempt to write the
3165                  * superblock failed.  This could happen because the
3166                  * USB device was yanked out.  Or it could happen to
3167                  * be a transient write error and maybe the block will
3168                  * be remapped.  Nothing we can do but to retry the
3169                  * write and hope for the best.
3170                  */
3171                 printk(KERN_ERR "EXT4-fs: previous I/O error to "
3172                        "superblock detected for %s.\n", sb->s_id);
3173                 clear_buffer_write_io_error(sbh);
3174                 set_buffer_uptodate(sbh);
3175         }
3176         es->s_wtime = cpu_to_le32(get_seconds());
3177         es->s_kbytes_written =
3178                 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 
3179                             ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
3180                               EXT4_SB(sb)->s_sectors_written_start) >> 1));
3181         ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3182                                         &EXT4_SB(sb)->s_freeblocks_counter));
3183         es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
3184                                         &EXT4_SB(sb)->s_freeinodes_counter));
3185         sb->s_dirt = 0;
3186         BUFFER_TRACE(sbh, "marking dirty");
3187         mark_buffer_dirty(sbh);
3188         if (sync) {
3189                 error = sync_dirty_buffer(sbh);
3190                 if (error)
3191                         return error;
3192
3193                 error = buffer_write_io_error(sbh);
3194                 if (error) {
3195                         printk(KERN_ERR "EXT4-fs: I/O error while writing "
3196                                "superblock for %s.\n", sb->s_id);
3197                         clear_buffer_write_io_error(sbh);
3198                         set_buffer_uptodate(sbh);
3199                 }
3200         }
3201         return error;
3202 }
3203
3204
3205 /*
3206  * Have we just finished recovery?  If so, and if we are mounting (or
3207  * remounting) the filesystem readonly, then we will end up with a
3208  * consistent fs on disk.  Record that fact.
3209  */
3210 static void ext4_mark_recovery_complete(struct super_block *sb,
3211                                         struct ext4_super_block *es)
3212 {
3213         journal_t *journal = EXT4_SB(sb)->s_journal;
3214
3215         if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3216                 BUG_ON(journal != NULL);
3217                 return;
3218         }
3219         jbd2_journal_lock_updates(journal);
3220         if (jbd2_journal_flush(journal) < 0)
3221                 goto out;
3222
3223         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
3224             sb->s_flags & MS_RDONLY) {
3225                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3226                 ext4_commit_super(sb, 1);
3227         }
3228
3229 out:
3230         jbd2_journal_unlock_updates(journal);
3231 }
3232
3233 /*
3234  * If we are mounting (or read-write remounting) a filesystem whose journal
3235  * has recorded an error from a previous lifetime, move that error to the
3236  * main filesystem now.
3237  */
3238 static void ext4_clear_journal_err(struct super_block *sb,
3239                                    struct ext4_super_block *es)
3240 {
3241         journal_t *journal;
3242         int j_errno;
3243         const char *errstr;
3244
3245         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3246
3247         journal = EXT4_SB(sb)->s_journal;
3248
3249         /*
3250          * Now check for any error status which may have been recorded in the
3251          * journal by a prior ext4_error() or ext4_abort()
3252          */
3253
3254         j_errno = jbd2_journal_errno(journal);
3255         if (j_errno) {
3256                 char nbuf[16];
3257
3258                 errstr = ext4_decode_error(sb, j_errno, nbuf);
3259                 ext4_warning(sb, __func__, "Filesystem error recorded "
3260                              "from previous mount: %s", errstr);
3261                 ext4_warning(sb, __func__, "Marking fs in need of "
3262                              "filesystem check.");
3263
3264                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3265                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3266                 ext4_commit_super(sb, 1);
3267
3268                 jbd2_journal_clear_err(journal);
3269         }
3270 }
3271
3272 /*
3273  * Force the running and committing transactions to commit,
3274  * and wait on the commit.
3275  */
3276 int ext4_force_commit(struct super_block *sb)
3277 {
3278         journal_t *journal;
3279         int ret = 0;
3280
3281         if (sb->s_flags & MS_RDONLY)
3282                 return 0;
3283
3284         journal = EXT4_SB(sb)->s_journal;
3285         if (journal)
3286                 ret = ext4_journal_force_commit(journal);
3287
3288         return ret;
3289 }
3290
3291 static void ext4_write_super(struct super_block *sb)
3292 {
3293         ext4_commit_super(sb, 1);
3294 }
3295
3296 static int ext4_sync_fs(struct super_block *sb, int wait)
3297 {
3298         int ret = 0;
3299         tid_t target;
3300
3301         trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3302         if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3303                 if (wait)
3304                         jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
3305         }
3306         return ret;
3307 }
3308
3309 /*
3310  * LVM calls this function before a (read-only) snapshot is created.  This
3311  * gives us a chance to flush the journal completely and mark the fs clean.
3312  */
3313 static int ext4_freeze(struct super_block *sb)
3314 {
3315         int error = 0;
3316         journal_t *journal;
3317
3318         if (sb->s_flags & MS_RDONLY)
3319                 return 0;
3320
3321         journal = EXT4_SB(sb)->s_journal;
3322
3323         /* Now we set up the journal barrier. */
3324         jbd2_journal_lock_updates(journal);
3325
3326         /*
3327          * Don't clear the needs_recovery flag if we failed to flush
3328          * the journal.
3329          */
3330         error = jbd2_journal_flush(journal);
3331         if (error < 0) {
3332         out:
3333                 jbd2_journal_unlock_updates(journal);
3334                 return error;
3335         }
3336
3337         /* Journal blocked and flushed, clear needs_recovery flag. */
3338         EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3339         error = ext4_commit_super(sb, 1);
3340         if (error)
3341                 goto out;
3342         return 0;
3343 }
3344
3345 /*
3346  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
3347  * flag here, even though the filesystem is not technically dirty yet.
3348  */
3349 static int ext4_unfreeze(struct super_block *sb)
3350 {
3351         if (sb->s_flags & MS_RDONLY)
3352                 return 0;
3353
3354         lock_super(sb);
3355         /* Reset the needs_recovery flag before the fs is unlocked. */
3356         EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3357         ext4_commit_super(sb, 1);
3358         unlock_super(sb);
3359         jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3360         return 0;
3361 }
3362
3363 static int ext4_remount(struct super_block *sb, int *flags, char *data)
3364 {
3365         struct ext4_super_block *es;
3366         struct ext4_sb_info *sbi = EXT4_SB(sb);
3367         ext4_fsblk_t n_blocks_count = 0;
3368         unsigned long old_sb_flags;
3369         struct ext4_mount_options old_opts;
3370         ext4_group_t g;
3371         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3372         int err;
3373 #ifdef CONFIG_QUOTA
3374         int i;
3375 #endif
3376
3377         /* Store the original options */
3378         old_sb_flags = sb->s_flags;
3379         old_opts.s_mount_opt = sbi->s_mount_opt;
3380         old_opts.s_resuid = sbi->s_resuid;
3381         old_opts.s_resgid = sbi->s_resgid;
3382         old_opts.s_commit_interval = sbi->s_commit_interval;
3383         old_opts.s_min_batch_time = sbi->s_min_batch_time;
3384         old_opts.s_max_batch_time = sbi->s_max_batch_time;
3385 #ifdef CONFIG_QUOTA
3386         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
3387         for (i = 0; i < MAXQUOTAS; i++)
3388                 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
3389 #endif
3390         if (sbi->s_journal && sbi->s_journal->j_task->io_context)
3391                 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
3392
3393         /*
3394          * Allow the "check" option to be passed as a remount option.
3395          */
3396         if (!parse_options(data, sb, NULL, &journal_ioprio,
3397                            &n_blocks_count, 1)) {
3398                 err = -EINVAL;
3399                 goto restore_opts;
3400         }
3401
3402         if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
3403                 ext4_abort(sb, __func__, "Abort forced by user");
3404
3405         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3406                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3407
3408         es = sbi->s_es;
3409
3410         if (sbi->s_journal) {
3411                 ext4_init_journal_params(sb, sbi->s_journal);
3412                 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3413         }
3414
3415         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3416                 n_blocks_count > ext4_blocks_count(es)) {
3417                 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
3418                         err = -EROFS;
3419                         goto restore_opts;
3420                 }
3421
3422                 if (*flags & MS_RDONLY) {
3423                         /*
3424                          * First of all, the unconditional stuff we have to do
3425                          * to disable replay of the journal when we next remount
3426                          */
3427                         sb->s_flags |= MS_RDONLY;
3428
3429                         /*
3430                          * OK, test if we are remounting a valid rw partition
3431                          * readonly, and if so set the rdonly flag and then
3432                          * mark the partition as valid again.
3433                          */
3434                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
3435                             (sbi->s_mount_state & EXT4_VALID_FS))
3436                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
3437
3438                         if (sbi->s_journal)
3439                                 ext4_mark_recovery_complete(sb, es);
3440                 } else {
3441                         int ret;
3442                         if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3443                                         ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3444                                 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3445                                        "remount RDWR because of unsupported "
3446                                        "optional features (%x).\n", sb->s_id,
3447                                 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3448                                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
3449                                 err = -EROFS;
3450                                 goto restore_opts;
3451                         }
3452
3453                         /*
3454                          * Make sure the group descriptor checksums
3455                          * are sane.  If they aren't, refuse to
3456                          * remount r/w.
3457                          */
3458                         for (g = 0; g < sbi->s_groups_count; g++) {
3459                                 struct ext4_group_desc *gdp =
3460                                         ext4_get_group_desc(sb, g, NULL);
3461
3462                                 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3463                                         printk(KERN_ERR
3464                "EXT4-fs: ext4_remount: "
3465                 "Checksum for group %u failed (%u!=%u)\n",
3466                 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3467                                                le16_to_cpu(gdp->bg_checksum));
3468                                         err = -EINVAL;
3469                                         goto restore_opts;
3470                                 }
3471                         }
3472
3473                         /*
3474                          * If we have an unprocessed orphan list hanging
3475                          * around from a previously readonly bdev mount,
3476                          * require a full umount/remount for now.
3477                          */
3478                         if (es->s_last_orphan) {
3479                                 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3480                                        "remount RDWR because of unprocessed "
3481                                        "orphan inode list.  Please "
3482                                        "umount/remount instead.\n",
3483                                        sb->s_id);
3484                                 err = -EINVAL;
3485                                 goto restore_opts;
3486                         }
3487
3488                         /*
3489                          * Mounting a RDONLY partition read-write, so reread
3490                          * and store the current valid flag.  (It may have
3491                          * been changed by e2fsck since we originally mounted
3492                          * the partition.)
3493                          */
3494                         if (sbi->s_journal)
3495                                 ext4_clear_journal_err(sb, es);
3496                         sbi->s_mount_state = le16_to_cpu(es->s_state);
3497                         if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3498                                 goto restore_opts;
3499                         if (!ext4_setup_super(sb, es, 0))
3500                                 sb->s_flags &= ~MS_RDONLY;
3501                 }
3502         }
3503         if (sbi->s_journal == NULL)
3504                 ext4_commit_super(sb, 1);
3505
3506 #ifdef CONFIG_QUOTA
3507         /* Release old quota file names */
3508         for (i = 0; i < MAXQUOTAS; i++)
3509                 if (old_opts.s_qf_names[i] &&
3510                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3511                         kfree(old_opts.s_qf_names[i]);
3512 #endif
3513         return 0;
3514 restore_opts:
3515         sb->s_flags = old_sb_flags;
3516         sbi->s_mount_opt = old_opts.s_mount_opt;
3517         sbi->s_resuid = old_opts.s_resuid;
3518         sbi->s_resgid = old_opts.s_resgid;
3519         sbi->s_commit_interval = old_opts.s_commit_interval;
3520         sbi->s_min_batch_time = old_opts.s_min_batch_time;
3521         sbi->s_max_batch_time = old_opts.s_max_batch_time;
3522 #ifdef CONFIG_QUOTA
3523         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
3524         for (i = 0; i < MAXQUOTAS; i++) {
3525                 if (sbi->s_qf_names[i] &&
3526                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3527                         kfree(sbi->s_qf_names[i]);
3528                 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
3529         }
3530 #endif
3531         return err;
3532 }
3533
3534 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3535 {
3536         struct super_block *sb = dentry->d_sb;
3537         struct ext4_sb_info *sbi = EXT4_SB(sb);
3538         struct ext4_super_block *es = sbi->s_es;
3539         u64 fsid;
3540
3541         if (test_opt(sb, MINIX_DF)) {
3542                 sbi->s_overhead_last = 0;
3543         } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3544                 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3545                 ext4_fsblk_t overhead = 0;
3546
3547                 /*
3548                  * Compute the overhead (FS structures).  This is constant
3549                  * for a given filesystem unless the number of block groups
3550                  * changes so we cache the previous value until it does.
3551                  */
3552
3553                 /*
3554                  * All of the blocks before first_data_block are
3555                  * overhead
3556                  */
3557                 overhead = le32_to_cpu(es->s_first_data_block);
3558
3559                 /*
3560                  * Add the overhead attributed to the superblock and
3561                  * block group descriptors.  If the sparse superblocks
3562                  * feature is turned on, then not all groups have this.
3563                  */
3564                 for (i = 0; i < ngroups; i++) {
3565                         overhead += ext4_bg_has_super(sb, i) +
3566                                 ext4_bg_num_gdb(sb, i);
3567                         cond_resched();
3568                 }
3569
3570                 /*
3571                  * Every block group has an inode bitmap, a block
3572                  * bitmap, and an inode table.
3573                  */
3574                 overhead += ngroups * (2 + sbi->s_itb_per_group);
3575                 sbi->s_overhead_last = overhead;
3576                 smp_wmb();
3577                 sbi->s_blocks_last = ext4_blocks_count(es);
3578         }
3579
3580         buf->f_type = EXT4_SUPER_MAGIC;
3581         buf->f_bsize = sb->s_blocksize;
3582         buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3583         buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3584                        percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3585         ext4_free_blocks_count_set(es, buf->f_bfree);
3586         buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3587         if (buf->f_bfree < ext4_r_blocks_count(es))
3588                 buf->f_bavail = 0;
3589         buf->f_files = le32_to_cpu(es->s_inodes_count);
3590         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3591         es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3592         buf->f_namelen = EXT4_NAME_LEN;
3593         fsid = le64_to_cpup((void *)es->s_uuid) ^
3594                le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3595         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3596         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3597         return 0;
3598 }
3599
3600 /* Helper function for writing quotas on sync - we need to start transaction before quota file
3601  * is locked for write. Otherwise the are possible deadlocks:
3602  * Process 1                         Process 2
3603  * ext4_create()                     quota_sync()
3604  *   jbd2_journal_start()                  write_dquot()
3605  *   vfs_dq_init()                         down(dqio_mutex)
3606  *     down(dqio_mutex)                    jbd2_journal_start()
3607  *
3608  */
3609
3610 #ifdef CONFIG_QUOTA
3611
3612 static inline struct inode *dquot_to_inode(struct dquot *dquot)
3613 {
3614         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
3615 }
3616
3617 static int ext4_write_dquot(struct dquot *dquot)
3618 {
3619         int ret, err;
3620         handle_t *handle;
3621         struct inode *inode;
3622
3623         inode = dquot_to_inode(dquot);
3624         handle = ext4_journal_start(inode,
3625                                         EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3626         if (IS_ERR(handle))
3627                 return PTR_ERR(handle);
3628         ret = dquot_commit(dquot);
3629         err = ext4_journal_stop(handle);
3630         if (!ret)
3631                 ret = err;
3632         return ret;
3633 }
3634
3635 static int ext4_acquire_dquot(struct dquot *dquot)
3636 {
3637         int ret, err;
3638         handle_t *handle;
3639
3640         handle = ext4_journal_start(dquot_to_inode(dquot),
3641                                         EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3642         if (IS_ERR(handle))
3643                 return PTR_ERR(handle);
3644         ret = dquot_acquire(dquot);
3645         err = ext4_journal_stop(handle);
3646         if (!ret)
3647                 ret = err;
3648         return ret;
3649 }
3650
3651 static int ext4_release_dquot(struct dquot *dquot)
3652 {
3653         int ret, err;
3654         handle_t *handle;
3655
3656         handle = ext4_journal_start(dquot_to_inode(dquot),
3657                                         EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3658         if (IS_ERR(handle)) {
3659                 /* Release dquot anyway to avoid endless cycle in dqput() */
3660                 dquot_release(dquot);
3661                 return PTR_ERR(handle);
3662         }
3663         ret = dquot_release(dquot);
3664         err = ext4_journal_stop(handle);
3665         if (!ret)
3666                 ret = err;
3667         return ret;
3668 }
3669
3670 static int ext4_mark_dquot_dirty(struct dquot *dquot)
3671 {
3672         /* Are we journaling quotas? */
3673         if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
3674             EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
3675                 dquot_mark_dquot_dirty(dquot);
3676                 return ext4_write_dquot(dquot);
3677         } else {
3678                 return dquot_mark_dquot_dirty(dquot);
3679         }
3680 }
3681
3682 static int ext4_write_info(struct super_block *sb, int type)
3683 {
3684         int ret, err;
3685         handle_t *handle;
3686
3687         /* Data block + inode block */
3688         handle = ext4_journal_start(sb->s_root->d_inode, 2);
3689         if (IS_ERR(handle))
3690                 return PTR_ERR(handle);
3691         ret = dquot_commit_info(sb, type);
3692         err = ext4_journal_stop(handle);
3693         if (!ret)
3694                 ret = err;
3695         return ret;
3696 }
3697
3698 /*
3699  * Turn on quotas during mount time - we need to find
3700  * the quota file and such...
3701  */
3702 static int ext4_quota_on_mount(struct super_block *sb, int type)
3703 {
3704         return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3705                         EXT4_SB(sb)->s_jquota_fmt, type);
3706 }
3707
3708 /*
3709  * Standard function to be called on quota_on
3710  */
3711 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3712                          char *name, int remount)
3713 {
3714         int err;
3715         struct path path;
3716
3717         if (!test_opt(sb, QUOTA))
3718                 return -EINVAL;
3719         /* When remounting, no checks are needed and in fact, name is NULL */
3720         if (remount)
3721                 return vfs_quota_on(sb, type, format_id, name, remount);
3722
3723         err = kern_path(name, LOOKUP_FOLLOW, &path);
3724         if (err)
3725                 return err;
3726
3727         /* Quotafile not on the same filesystem? */
3728         if (path.mnt->mnt_sb != sb) {
3729                 path_put(&path);
3730                 return -EXDEV;
3731         }
3732         /* Journaling quota? */
3733         if (EXT4_SB(sb)->s_qf_names[type]) {
3734                 /* Quotafile not in fs root? */
3735                 if (path.dentry->d_parent != sb->s_root)
3736                         printk(KERN_WARNING
3737                                 "EXT4-fs: Quota file not on filesystem root. "
3738                                 "Journaled quota will not work.\n");
3739         }
3740
3741         /*
3742          * When we journal data on quota file, we have to flush journal to see
3743          * all updates to the file when we bypass pagecache...
3744          */
3745         if (EXT4_SB(sb)->s_journal &&
3746             ext4_should_journal_data(path.dentry->d_inode)) {
3747                 /*
3748                  * We don't need to lock updates but journal_flush() could
3749                  * otherwise be livelocked...
3750                  */
3751                 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3752                 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3753                 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3754                 if (err) {
3755                         path_put(&path);
3756                         return err;
3757                 }
3758         }
3759
3760         err = vfs_quota_on_path(sb, type, format_id, &path);
3761         path_put(&path);
3762         return err;
3763 }
3764
3765 /* Read data from quotafile - avoid pagecache and such because we cannot afford
3766  * acquiring the locks... As quota files are never truncated and quota code
3767  * itself serializes the operations (and noone else should touch the files)
3768  * we don't have to be afraid of races */
3769 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
3770                                size_t len, loff_t off)
3771 {
3772         struct inode *inode = sb_dqopt(sb)->files[type];
3773         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3774         int err = 0;
3775         int offset = off & (sb->s_blocksize - 1);
3776         int tocopy;
3777         size_t toread;
3778         struct buffer_head *bh;
3779         loff_t i_size = i_size_read(inode);
3780
3781         if (off > i_size)
3782                 return 0;
3783         if (off+len > i_size)
3784                 len = i_size-off;
3785         toread = len;
3786         while (toread > 0) {
3787                 tocopy = sb->s_blocksize - offset < toread ?
3788                                 sb->s_blocksize - offset : toread;
3789                 bh = ext4_bread(NULL, inode, blk, 0, &err);
3790                 if (err)
3791                         return err;
3792                 if (!bh)        /* A hole? */
3793                         memset(data, 0, tocopy);
3794                 else
3795                         memcpy(data, bh->b_data+offset, tocopy);
3796                 brelse(bh);
3797                 offset = 0;
3798                 toread -= tocopy;
3799                 data += tocopy;
3800                 blk++;
3801         }
3802         return len;
3803 }
3804
3805 /* Write to quotafile (we know the transaction is already started and has
3806  * enough credits) */
3807 static ssize_t ext4_quota_write(struct super_block *sb, int type,
3808                                 const char *data, size_t len, loff_t off)
3809 {
3810         struct inode *inode = sb_dqopt(sb)->files[type];
3811         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3812         int err = 0;
3813         int offset = off & (sb->s_blocksize - 1);
3814         int tocopy;
3815         int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3816         size_t towrite = len;
3817         struct buffer_head *bh;
3818         handle_t *handle = journal_current_handle();
3819
3820         if (EXT4_SB(sb)->s_journal && !handle) {
3821                 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
3822                         " cancelled because transaction is not started.\n",
3823                         (unsigned long long)off, (unsigned long long)len);
3824                 return -EIO;
3825         }
3826         mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3827         while (towrite > 0) {
3828                 tocopy = sb->s_blocksize - offset < towrite ?
3829                                 sb->s_blocksize - offset : towrite;
3830                 bh = ext4_bread(handle, inode, blk, 1, &err);
3831                 if (!bh)
3832                         goto out;
3833                 if (journal_quota) {
3834                         err = ext4_journal_get_write_access(handle, bh);
3835                         if (err) {
3836                                 brelse(bh);
3837                                 goto out;
3838                         }
3839                 }
3840                 lock_buffer(bh);
3841                 memcpy(bh->b_data+offset, data, tocopy);
3842                 flush_dcache_page(bh->b_page);
3843                 unlock_buffer(bh);
3844                 if (journal_quota)
3845                         err = ext4_handle_dirty_metadata(handle, NULL, bh);
3846                 else {
3847                         /* Always do at least ordered writes for quotas */
3848                         err = ext4_jbd2_file_inode(handle, inode);
3849                         mark_buffer_dirty(bh);
3850                 }
3851                 brelse(bh);
3852                 if (err)
3853                         goto out;
3854                 offset = 0;
3855                 towrite -= tocopy;
3856                 data += tocopy;
3857                 blk++;
3858         }
3859 out:
3860         if (len == towrite) {
3861                 mutex_unlock(&inode->i_mutex);
3862                 return err;
3863         }
3864         if (inode->i_size < off+len-towrite) {
3865                 i_size_write(inode, off+len-towrite);
3866                 EXT4_I(inode)->i_disksize = inode->i_size;
3867         }
3868         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3869         ext4_mark_inode_dirty(handle, inode);
3870         mutex_unlock(&inode->i_mutex);
3871         return len - towrite;
3872 }
3873
3874 #endif
3875
3876 static int ext4_get_sb(struct file_system_type *fs_type,
3877         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3878 {
3879         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3880 }
3881
3882 static struct file_system_type ext4_fs_type = {
3883         .owner          = THIS_MODULE,
3884         .name           = "ext4",
3885         .get_sb         = ext4_get_sb,
3886         .kill_sb        = kill_block_super,
3887         .fs_flags       = FS_REQUIRES_DEV,
3888 };
3889
3890 #ifdef CONFIG_EXT4DEV_COMPAT
3891 static int ext4dev_get_sb(struct file_system_type *fs_type,
3892         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3893 {
3894         printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
3895                "to mount using ext4\n");
3896         printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
3897                "will go away by 2.6.31\n");
3898         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3899 }
3900
3901 static struct file_system_type ext4dev_fs_type = {
3902         .owner          = THIS_MODULE,
3903         .name           = "ext4dev",
3904         .get_sb         = ext4dev_get_sb,
3905         .kill_sb        = kill_block_super,
3906         .fs_flags       = FS_REQUIRES_DEV,
3907 };
3908 MODULE_ALIAS("ext4dev");
3909 #endif
3910
3911 static int __init init_ext4_fs(void)
3912 {
3913         int err;
3914
3915         ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3916         if (!ext4_kset)
3917                 return -ENOMEM;
3918         ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3919         err = init_ext4_mballoc();
3920         if (err)
3921                 return err;
3922
3923         err = init_ext4_xattr();
3924         if (err)
3925                 goto out2;
3926         err = init_inodecache();
3927         if (err)
3928                 goto out1;
3929         err = register_filesystem(&ext4_fs_type);
3930         if (err)
3931                 goto out;
3932 #ifdef CONFIG_EXT4DEV_COMPAT
3933         err = register_filesystem(&ext4dev_fs_type);
3934         if (err) {
3935                 unregister_filesystem(&ext4_fs_type);
3936                 goto out;
3937         }
3938 #endif
3939         return 0;
3940 out:
3941         destroy_inodecache();
3942 out1:
3943         exit_ext4_xattr();
3944 out2:
3945         exit_ext4_mballoc();
3946         return err;
3947 }
3948
3949 static void __exit exit_ext4_fs(void)
3950 {
3951         unregister_filesystem(&ext4_fs_type);
3952 #ifdef CONFIG_EXT4DEV_COMPAT
3953         unregister_filesystem(&ext4dev_fs_type);
3954 #endif
3955         destroy_inodecache();
3956         exit_ext4_xattr();
3957         exit_ext4_mballoc();
3958         remove_proc_entry("fs/ext4", NULL);
3959         kset_unregister(ext4_kset);
3960 }
3961
3962 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3963 MODULE_DESCRIPTION("Fourth Extended Filesystem");
3964 MODULE_LICENSE("GPL");
3965 module_init(init_ext4_fs)
3966 module_exit(exit_ext4_fs)