xfs: remove duplicated #include
[safe/jmp/linux-2.6] / fs / fat / inode.c
index 932c8d6..0ce143b 100644 (file)
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/seq_file.h>
-#include <linux/msdos_fs.h>
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/buffer_head.h>
+#include <linux/exportfs.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/parser.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+#include <linux/log2.h>
+#include <linux/hash.h>
 #include <asm/unaligned.h>
+#include "fat.h"
 
 #ifndef CONFIG_FAT_DEFAULT_IOCHARSET
 /* if user don't select VFAT, this is undefined. */
@@ -49,40 +54,69 @@ static int fat_add_cluster(struct inode *inode)
        return err;
 }
 
-static int fat_get_block(struct inode *inode, sector_t iblock,
-                        struct buffer_head *bh_result, int create)
+static inline int __fat_get_block(struct inode *inode, sector_t iblock,
+                                 unsigned long *max_blocks,
+                                 struct buffer_head *bh_result, int create)
 {
        struct super_block *sb = inode->i_sb;
+       struct msdos_sb_info *sbi = MSDOS_SB(sb);
+       unsigned long mapped_blocks;
        sector_t phys;
-       int err;
+       int err, offset;
 
-       err = fat_bmap(inode, iblock, &phys);
+       err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
        if (err)
                return err;
        if (phys) {
                map_bh(bh_result, sb, phys);
+               *max_blocks = min(mapped_blocks, *max_blocks);
                return 0;
        }
        if (!create)
                return 0;
+
        if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
-               fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)",
-                            MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
+               fat_fs_error(sb, "corrupted file size (i_pos %lld, %lld)",
+                       MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
                return -EIO;
        }
-       if (!((unsigned long)iblock & (MSDOS_SB(sb)->sec_per_clus - 1))) {
+
+       offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
+       if (!offset) {
+               /* TODO: multiple cluster allocation would be desirable. */
                err = fat_add_cluster(inode);
                if (err)
                        return err;
        }
-       MSDOS_I(inode)->mmu_private += sb->s_blocksize;
-       err = fat_bmap(inode, iblock, &phys);
+       /* available blocks on this cluster */
+       mapped_blocks = sbi->sec_per_clus - offset;
+
+       *max_blocks = min(mapped_blocks, *max_blocks);
+       MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
+
+       err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
        if (err)
                return err;
-       if (!phys)
-               BUG();
+
+       BUG_ON(!phys);
+       BUG_ON(*max_blocks != mapped_blocks);
        set_buffer_new(bh_result);
        map_bh(bh_result, sb, phys);
+
+       return 0;
+}
+
+static int fat_get_block(struct inode *inode, sector_t iblock,
+                        struct buffer_head *bh_result, int create)
+{
+       struct super_block *sb = inode->i_sb;
+       unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+       int err;
+
+       err = __fat_get_block(inode, iblock, &max_blocks, bh_result, create);
+       if (err)
+               return err;
+       bh_result->b_size = max_blocks << sb->s_blocksize_bits;
        return 0;
 }
 
@@ -108,19 +142,24 @@ static int fat_readpages(struct file *file, struct address_space *mapping,
        return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
 }
 
-static int fat_prepare_write(struct file *file, struct page *page,
-                            unsigned from, unsigned to)
+static int fat_write_begin(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned flags,
+                       struct page **pagep, void **fsdata)
 {
-       return cont_prepare_write(page, from, to, fat_get_block,
-                                 &MSDOS_I(page->mapping->host)->mmu_private);
+       *pagep = NULL;
+       return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+                               fat_get_block,
+                               &MSDOS_I(mapping->host)->mmu_private);
 }
 
-static int fat_commit_write(struct file *file, struct page *page,
-                           unsigned from, unsigned to)
+static int fat_write_end(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned copied,
+                       struct page *pagep, void *fsdata)
 {
-       struct inode *inode = page->mapping->host;
-       int err = generic_commit_write(file, page, from, to);
-       if (!err && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
+       struct inode *inode = mapping->host;
+       int err;
+       err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+       if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
                inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
                MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
                mark_inode_dirty(inode);
@@ -128,19 +167,57 @@ static int fat_commit_write(struct file *file, struct page *page,
        return err;
 }
 
+static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
+                            const struct iovec *iov,
+                            loff_t offset, unsigned long nr_segs)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+
+       if (rw == WRITE) {
+               /*
+                * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
+                * so we need to update the ->mmu_private to block boundary.
+                *
+                * But we must fill the remaining area or hole by nul for
+                * updating ->mmu_private.
+                *
+                * Return 0, and fallback to normal buffered write.
+                */
+               loff_t size = offset + iov_length(iov, nr_segs);
+               if (MSDOS_I(inode)->mmu_private < size)
+                       return 0;
+       }
+
+       /*
+        * FAT need to use the DIO_LOCKING for avoiding the race
+        * condition of fat_get_block() and ->truncate().
+        */
+       return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+                                 offset, nr_segs, fat_get_block, NULL);
+}
+
 static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 {
-       return generic_block_bmap(mapping, block, fat_get_block);
+       sector_t blocknr;
+
+       /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
+       down_read(&mapping->host->i_alloc_sem);
+       blocknr = generic_block_bmap(mapping, block, fat_get_block);
+       up_read(&mapping->host->i_alloc_sem);
+
+       return blocknr;
 }
 
-static struct address_space_operations fat_aops = {
+static const struct address_space_operations fat_aops = {
        .readpage       = fat_readpage,
        .readpages      = fat_readpages,
        .writepage      = fat_writepage,
        .writepages     = fat_writepages,
        .sync_page      = block_sync_page,
-       .prepare_write  = fat_prepare_write,
-       .commit_write   = fat_commit_write,
+       .write_begin    = fat_write_begin,
+       .write_end      = fat_write_end,
+       .direct_IO      = fat_direct_IO,
        .bmap           = _fat_bmap
 };
 
@@ -178,25 +255,21 @@ static void fat_hash_init(struct super_block *sb)
                INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
 }
 
-static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos)
+static inline unsigned long fat_hash(loff_t i_pos)
 {
-       unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb;
-       tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2);
-       return tmp & FAT_HASH_MASK;
+       return hash_32(i_pos, FAT_HASH_BITS);
 }
 
 void fat_attach(struct inode *inode, loff_t i_pos)
 {
-       struct super_block *sb = inode->i_sb;
-       struct msdos_sb_info *sbi = MSDOS_SB(sb);
+       struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+       struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
 
        spin_lock(&sbi->inode_hash_lock);
        MSDOS_I(inode)->i_pos = i_pos;
-       hlist_add_head(&MSDOS_I(inode)->i_fat_hash,
-                       sbi->inode_hashtable + fat_hash(sb, i_pos));
+       hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head);
        spin_unlock(&sbi->inode_hash_lock);
 }
-
 EXPORT_SYMBOL_GPL(fat_attach);
 
 void fat_detach(struct inode *inode)
@@ -207,13 +280,12 @@ void fat_detach(struct inode *inode)
        hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
        spin_unlock(&sbi->inode_hash_lock);
 }
-
 EXPORT_SYMBOL_GPL(fat_detach);
 
 struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
-       struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos);
+       struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
        struct hlist_node *_p;
        struct msdos_inode_info *i;
        struct inode *inode = NULL;
@@ -272,8 +344,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 
        if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
                inode->i_generation &= ~1;
-               inode->i_mode = MSDOS_MKMODE(de->attr,
-                       S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR;
+               inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO);
                inode->i_op = sbi->dir_ops;
                inode->i_fop = &fat_dir_operations;
 
@@ -290,11 +361,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
                inode->i_nlink = fat_subdirs(inode);
        } else { /* not a directory */
                inode->i_generation |= 1;
-               inode->i_mode = MSDOS_MKMODE(de->attr,
-                   ((sbi->options.showexec &&
-                       !is_exec(de->ext))
-                       ? S_IRUGO|S_IWUGO : S_IRWXUGO)
-                   & ~sbi->options.fs_fmask) | S_IFREG;
+               inode->i_mode = fat_make_mode(sbi, de->attr,
+                       ((sbi->options.showexec && !is_exec(de->name + 8))
+                        ? S_IRUGO|S_IWUGO : S_IRWXUGO));
                MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
                if (sbi->fat_bits == 32)
                        MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16);
@@ -310,24 +379,16 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
                if (sbi->options.sys_immutable)
                        inode->i_flags |= S_IMMUTABLE;
        }
-       MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED;
-       /* this is as close to the truth as we can get ... */
-       inode->i_blksize = sbi->cluster_size;
+       fat_save_attrs(inode, de->attr);
+
        inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
                           & ~((loff_t)sbi->cluster_size - 1)) >> 9;
-       inode->i_mtime.tv_sec =
-               date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
-       inode->i_mtime.tv_nsec = 0;
+
+       fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
        if (sbi->options.isvfat) {
-               int secs = de->ctime_cs / 100;
-               int csecs = de->ctime_cs % 100;
-               inode->i_ctime.tv_sec  =
-                       date_dos2unix(le16_to_cpu(de->ctime),
-                                     le16_to_cpu(de->cdate)) + secs;
-               inode->i_ctime.tv_nsec = csecs * 10000000;
-               inode->i_atime.tv_sec =
-                       date_dos2unix(le16_to_cpu(0), le16_to_cpu(de->adate));
-               inode->i_atime.tv_nsec = 0;
+               fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
+                                 de->cdate, de->ctime_cs);
+               fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
        } else
                inode->i_ctime = inode->i_atime = inode->i_mtime;
 
@@ -367,64 +428,70 @@ EXPORT_SYMBOL_GPL(fat_build_inode);
 static void fat_delete_inode(struct inode *inode)
 {
        truncate_inode_pages(&inode->i_data, 0);
-
-       if (!is_bad_inode(inode)) {
-               inode->i_size = 0;
-               fat_truncate(inode);
-       }
+       inode->i_size = 0;
+       fat_truncate(inode);
        clear_inode(inode);
 }
 
 static void fat_clear_inode(struct inode *inode)
 {
-       struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
-
-       if (is_bad_inode(inode))
-               return;
-       lock_kernel();
-       spin_lock(&sbi->inode_hash_lock);
        fat_cache_inval_inode(inode);
-       hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
-       spin_unlock(&sbi->inode_hash_lock);
-       unlock_kernel();
+       fat_detach(inode);
 }
 
 static void fat_write_super(struct super_block *sb)
 {
+       lock_super(sb);
        sb->s_dirt = 0;
 
        if (!(sb->s_flags & MS_RDONLY))
                fat_clusters_flush(sb);
+       unlock_super(sb);
+}
+
+static int fat_sync_fs(struct super_block *sb, int wait)
+{
+       int err = 0;
+
+       if (sb->s_dirt) {
+               lock_super(sb);
+               sb->s_dirt = 0;
+               err = fat_clusters_flush(sb);
+               unlock_super(sb);
+       }
+
+       return err;
 }
 
 static void fat_put_super(struct super_block *sb)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
-       if (sbi->nls_disk) {
-               unload_nls(sbi->nls_disk);
-               sbi->nls_disk = NULL;
-               sbi->options.codepage = fat_default_codepage;
-       }
-       if (sbi->nls_io) {
-               unload_nls(sbi->nls_io);
-               sbi->nls_io = NULL;
-       }
-       if (sbi->options.iocharset != fat_default_iocharset) {
+       lock_kernel();
+
+       if (sb->s_dirt)
+               fat_write_super(sb);
+
+       iput(sbi->fat_inode);
+
+       unload_nls(sbi->nls_disk);
+       unload_nls(sbi->nls_io);
+
+       if (sbi->options.iocharset != fat_default_iocharset)
                kfree(sbi->options.iocharset);
-               sbi->options.iocharset = fat_default_iocharset;
-       }
 
        sb->s_fs_info = NULL;
        kfree(sbi);
+
+       unlock_kernel();
 }
 
-static kmem_cache_t *fat_inode_cachep;
+static struct kmem_cache *fat_inode_cachep;
 
 static struct inode *fat_alloc_inode(struct super_block *sb)
 {
        struct msdos_inode_info *ei;
-       ei = kmem_cache_alloc(fat_inode_cachep, SLAB_KERNEL);
+       ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS);
        if (!ei)
                return NULL;
        return &ei->vfs_inode;
@@ -435,27 +502,25 @@ static void fat_destroy_inode(struct inode *inode)
        kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo)
 {
        struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
 
-       if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-           SLAB_CTOR_CONSTRUCTOR) {
-               spin_lock_init(&ei->cache_lru_lock);
-               ei->nr_caches = 0;
-               ei->cache_valid_id = FAT_CACHE_VALID + 1;
-               INIT_LIST_HEAD(&ei->cache_lru);
-               INIT_HLIST_NODE(&ei->i_fat_hash);
-               inode_init_once(&ei->vfs_inode);
-       }
+       spin_lock_init(&ei->cache_lru_lock);
+       ei->nr_caches = 0;
+       ei->cache_valid_id = FAT_CACHE_VALID + 1;
+       INIT_LIST_HEAD(&ei->cache_lru);
+       INIT_HLIST_NODE(&ei->i_fat_hash);
+       inode_init_once(&ei->vfs_inode);
 }
 
 static int __init fat_init_inodecache(void)
 {
        fat_inode_cachep = kmem_cache_create("fat_inode_cache",
                                             sizeof(struct msdos_inode_info),
-                                            0, SLAB_RECLAIM_ACCOUNT,
-                                            init_once, NULL);
+                                            0, (SLAB_RECLAIM_ACCOUNT|
+                                               SLAB_MEM_SPREAD),
+                                            init_once);
        if (fat_inode_cachep == NULL)
                return -ENOMEM;
        return 0;
@@ -463,8 +528,7 @@ static int __init fat_init_inodecache(void)
 
 static void __exit fat_destroy_inodecache(void)
 {
-       if (kmem_cache_destroy(fat_inode_cachep))
-               printk(KERN_INFO "fat_inode_cache: not all structures were freed\n");
+       kmem_cache_destroy(fat_inode_cachep);
 }
 
 static int fat_remount(struct super_block *sb, int *flags, char *data)
@@ -474,54 +538,72 @@ static int fat_remount(struct super_block *sb, int *flags, char *data)
        return 0;
 }
 
-static int fat_statfs(struct super_block *sb, struct kstatfs *buf)
+static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+       struct super_block *sb = dentry->d_sb;
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
+       u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 
        /* If the count of free cluster is still unknown, counts it here. */
-       if (sbi->free_clusters == -1) {
-               int err = fat_count_free_clusters(sb);
+       if (sbi->free_clusters == -1 || !sbi->free_clus_valid) {
+               int err = fat_count_free_clusters(dentry->d_sb);
                if (err)
                        return err;
        }
 
-       buf->f_type = sb->s_magic;
+       buf->f_type = dentry->d_sb->s_magic;
        buf->f_bsize = sbi->cluster_size;
        buf->f_blocks = sbi->max_cluster - FAT_START_ENT;
        buf->f_bfree = sbi->free_clusters;
        buf->f_bavail = sbi->free_clusters;
-       buf->f_namelen = sbi->options.isvfat ? 260 : 12;
+       buf->f_fsid.val[0] = (u32)id;
+       buf->f_fsid.val[1] = (u32)(id >> 32);
+       buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12;
 
        return 0;
 }
 
-static int fat_write_inode(struct inode *inode, int wait)
+static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
+                                   struct inode *inode)
+{
+       loff_t i_pos;
+#if BITS_PER_LONG == 32
+       spin_lock(&sbi->inode_hash_lock);
+#endif
+       i_pos = MSDOS_I(inode)->i_pos;
+#if BITS_PER_LONG == 32
+       spin_unlock(&sbi->inode_hash_lock);
+#endif
+       return i_pos;
+}
+
+static int __fat_write_inode(struct inode *inode, int wait)
 {
        struct super_block *sb = inode->i_sb;
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        struct buffer_head *bh;
        struct msdos_dir_entry *raw_entry;
        loff_t i_pos;
-       int err = 0;
+       int err;
+
+       if (inode->i_ino == MSDOS_ROOT_INO)
+               return 0;
 
 retry:
-       i_pos = MSDOS_I(inode)->i_pos;
-       if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
+       i_pos = fat_i_pos_read(sbi, inode);
+       if (!i_pos)
                return 0;
 
-       lock_kernel();
        bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
        if (!bh) {
                printk(KERN_ERR "FAT: unable to read inode block "
                       "for updating (i_pos %lld)\n", i_pos);
-               err = -EIO;
-               goto out;
+               return -EIO;
        }
        spin_lock(&sbi->inode_hash_lock);
        if (i_pos != MSDOS_I(inode)->i_pos) {
                spin_unlock(&sbi->inode_hash_lock);
                brelse(bh);
-               unlock_kernel();
                goto retry;
        }
 
@@ -531,48 +613,52 @@ retry:
                raw_entry->size = 0;
        else
                raw_entry->size = cpu_to_le32(inode->i_size);
-       raw_entry->attr = fat_attr(inode);
+       raw_entry->attr = fat_make_attrs(inode);
        raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
        raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
-       fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
+       fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
+                         &raw_entry->date, NULL);
        if (sbi->options.isvfat) {
                __le16 atime;
-               fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
-               fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
-               raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
-                       inode->i_ctime.tv_nsec / 10000000;
+               fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
+                                 &raw_entry->cdate, &raw_entry->ctime_cs);
+               fat_time_unix2fat(sbi, &inode->i_atime, &atime,
+                                 &raw_entry->adate, NULL);
        }
        spin_unlock(&sbi->inode_hash_lock);
        mark_buffer_dirty(bh);
+       err = 0;
        if (wait)
                err = sync_dirty_buffer(bh);
        brelse(bh);
-out:
-       unlock_kernel();
        return err;
 }
 
+static int fat_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+       return __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int fat_sync_inode(struct inode *inode)
 {
-       return fat_write_inode(inode, 1);
+       return __fat_write_inode(inode, 1);
 }
 
 EXPORT_SYMBOL_GPL(fat_sync_inode);
 
 static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
-static struct super_operations fat_sops = {
+static const struct super_operations fat_sops = {
        .alloc_inode    = fat_alloc_inode,
        .destroy_inode  = fat_destroy_inode,
        .write_inode    = fat_write_inode,
        .delete_inode   = fat_delete_inode,
        .put_super      = fat_put_super,
        .write_super    = fat_write_super,
+       .sync_fs        = fat_sync_fs,
        .statfs         = fat_statfs,
        .clear_inode    = fat_clear_inode,
        .remount_fs     = fat_remount,
 
-       .read_inode     = make_bad_inode,
-
        .show_options   = fat_show_options,
 };
 
@@ -590,27 +676,18 @@ static struct super_operations fat_sops = {
  * of i_logstart is used to store the directory entry offset.
  */
 
-static struct dentry *
-fat_decode_fh(struct super_block *sb, __u32 *fh, int len, int fhtype,
-             int (*acceptable)(void *context, struct dentry *de),
-             void *context)
-{
-       if (fhtype != 3)
-               return ERR_PTR(-ESTALE);
-       if (len < 5)
-               return ERR_PTR(-ESTALE);
-
-       return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, context);
-}
-
-static struct dentry *fat_get_dentry(struct super_block *sb, void *inump)
+static struct dentry *fat_fh_to_dentry(struct super_block *sb,
+               struct fid *fid, int fh_len, int fh_type)
 {
        struct inode *inode = NULL;
        struct dentry *result;
-       __u32 *fh = inump;
+       u32 *fh = fid->raw;
 
-       inode = iget(sb, fh[0]);
-       if (!inode || is_bad_inode(inode) || inode->i_generation != fh[1]) {
+       if (fh_len < 5 || fh_type != 3)
+               return NULL;
+
+       inode = ilookup(sb, fh[0]);
+       if (!inode || inode->i_generation != fh[1]) {
                if (inode)
                        iput(inode);
                inode = NULL;
@@ -633,33 +710,24 @@ static struct dentry *fat_get_dentry(struct super_block *sb, void *inump)
                        inode = NULL;
                }
        }
-       if (!inode) {
-               /* For now, do nothing
-                * What we could do is:
-                * follow the file starting at fh[4], and record
-                * the ".." entry, and the name of the fh[2] entry.
-                * The follow the ".." file finding the next step up.
-                * This way we build a path to the root of
-                * the tree. If this works, we lookup the path and so
-                * get this inode into the cache.
-                * Finally try the fat_iget lookup again
-                * If that fails, then weare totally out of luck
-                * But all that is for another day
-                */
-       }
-       if (!inode)
-               return ERR_PTR(-ESTALE);
-
 
-       /* now to find a dentry.
-        * If possible, get a well-connected one
+       /*
+        * For now, do nothing if the inode is not found.
+        *
+        * What we could do is:
+        *
+        *      - follow the file starting at fh[4], and record the ".." entry,
+        *        and the name of the fh[2] entry.
+        *      - then follow the ".." file finding the next step up.
+        *
+        * This way we build a path to the root of the tree. If this works, we
+        * lookup the path and so get this inode into the cache.  Finally try
+        * the fat_iget lookup again.  If that fails, then we are totally out
+        * of luck.  But all that is for another day
         */
-       result = d_alloc_anon(inode);
-       if (result == NULL) {
-               iput(inode);
-               return ERR_PTR(-ENOMEM);
-       }
-       result->d_op = sb->s_root->d_op;
+       result = d_obtain_alias(inode);
+       if (!IS_ERR(result))
+               result->d_op = sb->s_root->d_op;
        return result;
 }
 
@@ -689,6 +757,7 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
 
 static struct dentry *fat_get_parent(struct dentry *child)
 {
+       struct super_block *sb = child->d_sb;
        struct buffer_head *bh;
        struct msdos_dir_entry *de;
        loff_t i_pos;
@@ -696,34 +765,28 @@ static struct dentry *fat_get_parent(struct dentry *child)
        struct inode *inode;
        int err;
 
-       lock_kernel();
+       lock_super(sb);
 
        err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos);
        if (err) {
                parent = ERR_PTR(err);
                goto out;
        }
-       inode = fat_build_inode(child->d_sb, de, i_pos);
+       inode = fat_build_inode(sb, de, i_pos);
        brelse(bh);
-       if (IS_ERR(inode)) {
-               parent = ERR_PTR(PTR_ERR(inode));
-               goto out;
-       }
-       parent = d_alloc_anon(inode);
-       if (!parent) {
-               iput(inode);
-               parent = ERR_PTR(-ENOMEM);
-       }
+
+       parent = d_obtain_alias(inode);
+       if (!IS_ERR(parent))
+               parent->d_op = sb->s_root->d_op;
 out:
-       unlock_kernel();
+       unlock_super(sb);
 
        return parent;
 }
 
-static struct export_operations fat_export_ops = {
-       .decode_fh      = fat_decode_fh,
+static const struct export_operations fat_export_ops = {
        .encode_fh      = fat_encode_fh,
-       .get_dentry     = fat_get_dentry,
+       .fh_to_dentry   = fat_fh_to_dentry,
        .get_parent     = fat_get_parent,
 };
 
@@ -739,6 +802,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
                seq_printf(m, ",gid=%u", opts->fs_gid);
        seq_printf(m, ",fmask=%04o", opts->fs_fmask);
        seq_printf(m, ",dmask=%04o", opts->fs_dmask);
+       if (opts->allow_utime)
+               seq_printf(m, ",allow_utime=%04o", opts->allow_utime);
        if (sbi->nls_disk)
                seq_printf(m, ",codepage=%s", sbi->nls_disk->charset);
        if (isvfat) {
@@ -756,7 +821,7 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
                        seq_puts(m, ",shortname=mixed");
                        break;
                case VFAT_SFN_DISPLAY_LOWER | VFAT_SFN_CREATE_WIN95:
-                       /* seq_puts(m, ",shortname=lower"); */
+                       seq_puts(m, ",shortname=lower");
                        break;
                default:
                        seq_puts(m, ",shortname=unknown");
@@ -765,6 +830,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
        }
        if (opts->name_check != 'n')
                seq_printf(m, ",check=%c", opts->name_check);
+       if (opts->usefree)
+               seq_puts(m, ",usefree");
        if (opts->quiet)
                seq_puts(m, ",quiet");
        if (opts->showexec)
@@ -783,23 +850,38 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
                        seq_puts(m, ",uni_xlate");
                if (!opts->numtail)
                        seq_puts(m, ",nonumtail");
+               if (opts->rodir)
+                       seq_puts(m, ",rodir");
        }
+       if (opts->flush)
+               seq_puts(m, ",flush");
+       if (opts->tz_utc)
+               seq_puts(m, ",tz=UTC");
+       if (opts->errors == FAT_ERRORS_CONT)
+               seq_puts(m, ",errors=continue");
+       else if (opts->errors == FAT_ERRORS_PANIC)
+               seq_puts(m, ",errors=panic");
+       else
+               seq_puts(m, ",errors=remount-ro");
+       if (opts->discard)
+               seq_puts(m, ",discard");
 
        return 0;
 }
 
 enum {
        Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid,
-       Opt_umask, Opt_dmask, Opt_fmask, Opt_codepage, Opt_nocase,
-       Opt_quiet, Opt_showexec, Opt_debug, Opt_immutable,
-       Opt_dots, Opt_nodots,
+       Opt_umask, Opt_dmask, Opt_fmask, Opt_allow_utime, Opt_codepage,
+       Opt_usefree, Opt_nocase, Opt_quiet, Opt_showexec, Opt_debug,
+       Opt_immutable, Opt_dots, Opt_nodots,
        Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
        Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
        Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
-       Opt_obsolate, Opt_err,
+       Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
+       Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err,
 };
 
-static match_table_t fat_tokens = {
+static const match_table_t fat_tokens = {
        {Opt_check_r, "check=relaxed"},
        {Opt_check_s, "check=strict"},
        {Opt_check_n, "check=normal"},
@@ -811,12 +893,20 @@ static match_table_t fat_tokens = {
        {Opt_umask, "umask=%o"},
        {Opt_dmask, "dmask=%o"},
        {Opt_fmask, "fmask=%o"},
+       {Opt_allow_utime, "allow_utime=%o"},
        {Opt_codepage, "codepage=%u"},
+       {Opt_usefree, "usefree"},
        {Opt_nocase, "nocase"},
        {Opt_quiet, "quiet"},
        {Opt_showexec, "showexec"},
        {Opt_debug, "debug"},
        {Opt_immutable, "sys_immutable"},
+       {Opt_flush, "flush"},
+       {Opt_tz_utc, "tz=UTC"},
+       {Opt_err_cont, "errors=continue"},
+       {Opt_err_panic, "errors=panic"},
+       {Opt_err_ro, "errors=remount-ro"},
+       {Opt_discard, "discard"},
        {Opt_obsolate, "conv=binary"},
        {Opt_obsolate, "conv=text"},
        {Opt_obsolate, "conv=auto"},
@@ -828,16 +918,16 @@ static match_table_t fat_tokens = {
        {Opt_obsolate, "cvf_format=%20s"},
        {Opt_obsolate, "cvf_options=%100s"},
        {Opt_obsolate, "posix"},
-       {Opt_err, NULL}
+       {Opt_err, NULL},
 };
-static match_table_t msdos_tokens = {
+static const match_table_t msdos_tokens = {
        {Opt_nodots, "nodots"},
        {Opt_nodots, "dotsOK=no"},
        {Opt_dots, "dots"},
        {Opt_dots, "dotsOK=yes"},
        {Opt_err, NULL}
 };
-static match_table_t vfat_tokens = {
+static const match_table_t vfat_tokens = {
        {Opt_charset, "iocharset=%s"},
        {Opt_shortname_lower, "shortname=lower"},
        {Opt_shortname_win95, "shortname=win95"},
@@ -864,6 +954,7 @@ static match_table_t vfat_tokens = {
        {Opt_nonumtail_yes, "nonumtail=yes"},
        {Opt_nonumtail_yes, "nonumtail=true"},
        {Opt_nonumtail_yes, "nonumtail"},
+       {Opt_rodir, "rodir"},
        {Opt_err, NULL}
 };
 
@@ -877,24 +968,30 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 
        opts->isvfat = is_vfat;
 
-       opts->fs_uid = current->uid;
-       opts->fs_gid = current->gid;
-       opts->fs_fmask = opts->fs_dmask = current->fs->umask;
+       opts->fs_uid = current_uid();
+       opts->fs_gid = current_gid();
+       opts->fs_fmask = opts->fs_dmask = current_umask();
+       opts->allow_utime = -1;
        opts->codepage = fat_default_codepage;
        opts->iocharset = fat_default_iocharset;
-       if (is_vfat)
-               opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95;
-       else
+       if (is_vfat) {
+               opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
+               opts->rodir = 0;
+       } else {
                opts->shortname = 0;
+               opts->rodir = 1;
+       }
        opts->name_check = 'n';
        opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK =  0;
        opts->utf8 = opts->unicode_xlate = 0;
        opts->numtail = 1;
-       opts->nocase = 0;
+       opts->usefree = opts->nocase = 0;
+       opts->tz_utc = 0;
+       opts->errors = FAT_ERRORS_RO;
        *debug = 0;
 
        if (!options)
-               return 0;
+               goto out;
 
        while ((p = strsep(&options, ",")) != NULL) {
                int token;
@@ -918,6 +1015,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
                case Opt_check_n:
                        opts->name_check = 'n';
                        break;
+               case Opt_usefree:
+                       opts->usefree = 1;
+                       break;
                case Opt_nocase:
                        if (!is_vfat)
                                opts->nocase = 1;
@@ -964,11 +1064,31 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
                                return 0;
                        opts->fs_fmask = option;
                        break;
+               case Opt_allow_utime:
+                       if (match_octal(&args[0], &option))
+                               return 0;
+                       opts->allow_utime = option & (S_IWGRP | S_IWOTH);
+                       break;
                case Opt_codepage:
                        if (match_int(&args[0], &option))
                                return 0;
                        opts->codepage = option;
                        break;
+               case Opt_flush:
+                       opts->flush = 1;
+                       break;
+               case Opt_tz_utc:
+                       opts->tz_utc = 1;
+                       break;
+               case Opt_err_cont:
+                       opts->errors = FAT_ERRORS_CONT;
+                       break;
+               case Opt_err_panic:
+                       opts->errors = FAT_ERRORS_PANIC;
+                       break;
+               case Opt_err_ro:
+                       opts->errors = FAT_ERRORS_RO;
+                       break;
 
                /* msdos specific */
                case Opt_dots:
@@ -1021,6 +1141,12 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
                case Opt_nonumtail_yes:         /* empty or 1 or yes or true */
                        opts->numtail = 0;      /* negated option */
                        break;
+               case Opt_rodir:
+                       opts->rodir = 1;
+                       break;
+               case Opt_discard:
+                       opts->discard = 1;
+                       break;
 
                /* obsolete mount options */
                case Opt_obsolate:
@@ -1037,12 +1163,18 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
                        return -EINVAL;
                }
        }
-       /* UTF8 doesn't provide FAT semantics */
+
+out:
+       /* UTF-8 doesn't provide FAT semantics */
        if (!strcmp(opts->iocharset, "utf8")) {
                printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
-                      " for FAT filesystems, filesystem will be case sensitive!\n");
+                      " for FAT filesystems, filesystem will be "
+                      "case sensitive!\n");
        }
 
+       /* If user doesn't specify allow_utime, it's initialized from dmask. */
+       if (opts->allow_utime == (unsigned short)-1)
+               opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
        if (opts->unicode_xlate)
                opts->utf8 = 0;
 
@@ -1060,7 +1192,7 @@ static int fat_read_root(struct inode *inode)
        inode->i_gid = sbi->options.fs_gid;
        inode->i_version++;
        inode->i_generation = 0;
-       inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR;
+       inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
        inode->i_op = sbi->dir_ops;
        inode->i_fop = &fat_dir_operations;
        if (sbi->fat_bits == 32) {
@@ -1072,13 +1204,12 @@ static int fat_read_root(struct inode *inode)
                MSDOS_I(inode)->i_start = 0;
                inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry);
        }
-       inode->i_blksize = sbi->cluster_size;
        inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
                           & ~((loff_t)sbi->cluster_size - 1)) >> 9;
        MSDOS_I(inode)->i_logstart = 0;
        MSDOS_I(inode)->mmu_private = inode->i_size;
 
-       MSDOS_I(inode)->i_attrs = ATTR_NONE;
+       fat_save_attrs(inode, ATTR_DIR);
        inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
        inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
        inode->i_nlink = fat_subdirs(inode)+2;
@@ -1090,9 +1221,9 @@ static int fat_read_root(struct inode *inode)
  * Read the super block of an MS-DOS FS.
  */
 int fat_fill_super(struct super_block *sb, void *data, int silent,
-                  struct inode_operations *fs_dir_inode_ops, int isvfat)
+                  const struct inode_operations *fs_dir_inode_ops, int isvfat)
 {
-       struct inode *root_inode = NULL;
+       struct inode *root_inode = NULL, *fat_inode = NULL;
        struct buffer_head *bh;
        struct fat_boot_sector *b;
        struct msdos_sb_info *sbi;
@@ -1103,11 +1234,16 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        long error;
        char buf[50];
 
-       sbi = kmalloc(sizeof(struct msdos_sb_info), GFP_KERNEL);
+       /*
+        * GFP_KERNEL is ok here, because while we do hold the
+        * supeblock lock, memory pressure can't call back into
+        * the filesystem, since we're only just about to mount
+        * it and have no inodes etc active!
+        */
+       sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL);
        if (!sbi)
                return -ENOMEM;
        sb->s_fs_info = sbi;
-       memset(sbi, 0, sizeof(struct msdos_sb_info));
 
        sb->s_flags |= MS_NODIRATIME;
        sb->s_magic = MSDOS_SUPER_MAGIC;
@@ -1147,19 +1283,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
         */
 
        media = b->media;
-       if (!FAT_VALID_MEDIA(media)) {
+       if (!fat_valid_media(media)) {
                if (!silent)
                        printk(KERN_ERR "FAT: invalid media value (0x%02x)\n",
                               media);
                brelse(bh);
                goto out_invalid;
        }
-       logical_sector_size =
-               le16_to_cpu(get_unaligned((__le16 *)&b->sector_size));
-       if (!logical_sector_size
-           || (logical_sector_size & (logical_sector_size - 1))
+       logical_sector_size = get_unaligned_le16(&b->sector_size);
+       if (!is_power_of_2(logical_sector_size)
            || (logical_sector_size < 512)
-           || (PAGE_CACHE_SIZE < logical_sector_size)) {
+           || (logical_sector_size > 4096)) {
                if (!silent)
                        printk(KERN_ERR "FAT: bogus logical sector size %u\n",
                               logical_sector_size);
@@ -1167,8 +1301,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
                goto out_invalid;
        }
        sbi->sec_per_clus = b->sec_per_clus;
-       if (!sbi->sec_per_clus
-           || (sbi->sec_per_clus & (sbi->sec_per_clus - 1))) {
+       if (!is_power_of_2(sbi->sec_per_clus)) {
                if (!silent)
                        printk(KERN_ERR "FAT: bogus sectors per cluster %u\n",
                               sbi->sec_per_clus);
@@ -1208,6 +1341,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        sbi->fat_length = le16_to_cpu(b->fat_length);
        sbi->root_cluster = 0;
        sbi->free_clusters = -1;        /* Don't know yet */
+       sbi->free_clus_valid = 0;
        sbi->prev_free = FAT_START_ENT;
 
        if (!sbi->fat_length && b->fat32_length) {
@@ -1236,14 +1370,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
 
                fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
                if (!IS_FSINFO(fsinfo)) {
-                       printk(KERN_WARNING
-                              "FAT: Did not find valid FSINFO signature.\n"
-                              "     Found signature1 0x%08x signature2 0x%08x"
-                              " (sector = %lu)\n",
+                       printk(KERN_WARNING "FAT: Invalid FSINFO signature: "
+                              "0x%08x, 0x%08x (sector = %lu)\n",
                               le32_to_cpu(fsinfo->signature1),
                               le32_to_cpu(fsinfo->signature2),
                               sbi->fsinfo_sector);
                } else {
+                       if (sbi->options.usefree)
+                               sbi->free_clus_valid = 1;
                        sbi->free_clusters = le32_to_cpu(fsinfo->free_clusters);
                        sbi->prev_free = le32_to_cpu(fsinfo->next_cluster);
                }
@@ -1255,8 +1389,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
 
        sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
-       sbi->dir_entries =
-               le16_to_cpu(get_unaligned((__le16 *)&b->dir_entries));
+       sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
        if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
                if (!silent)
                        printk(KERN_ERR "FAT: bogus directroy-entries per block"
@@ -1268,7 +1401,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        rootdir_sectors = sbi->dir_entries
                * sizeof(struct msdos_dir_entry) / sb->s_blocksize;
        sbi->data_start = sbi->dir_start + rootdir_sectors;
-       total_sectors = le16_to_cpu(get_unaligned((__le16 *)&b->sectors));
+       total_sectors = get_unaligned_le16(&b->sectors);
        if (total_sectors == 0)
                total_sectors = le32_to_cpu(b->total_sect);
 
@@ -1330,6 +1463,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        }
 
        error = -ENOMEM;
+       fat_inode = new_inode(sb);
+       if (!fat_inode)
+               goto out_fail;
+       MSDOS_I(fat_inode)->i_pos = 0;
+       sbi->fat_inode = fat_inode;
        root_inode = new_inode(sb);
        if (!root_inode)
                goto out_fail;
@@ -1355,6 +1493,8 @@ out_invalid:
                       " on dev %s.\n", sb->s_id);
 
 out_fail:
+       if (fat_inode)
+               iput(fat_inode);
        if (root_inode)
                iput(root_inode);
        if (sbi->nls_io)
@@ -1370,8 +1510,55 @@ out_fail:
 
 EXPORT_SYMBOL_GPL(fat_fill_super);
 
-int __init fat_cache_init(void);
-void fat_cache_destroy(void);
+/*
+ * helper function for fat_flush_inodes.  This writes both the inode
+ * and the file data blocks, waiting for in flight data blocks before
+ * the start of the call.  It does not wait for any io started
+ * during the call
+ */
+static int writeback_inode(struct inode *inode)
+{
+
+       int ret;
+       struct address_space *mapping = inode->i_mapping;
+       struct writeback_control wbc = {
+              .sync_mode = WB_SYNC_NONE,
+             .nr_to_write = 0,
+       };
+       /* if we used WB_SYNC_ALL, sync_inode waits for the io for the
+       * inode to finish.  So WB_SYNC_NONE is sent down to sync_inode
+       * and filemap_fdatawrite is used for the data blocks
+       */
+       ret = sync_inode(inode, &wbc);
+       if (!ret)
+              ret = filemap_fdatawrite(mapping);
+       return ret;
+}
+
+/*
+ * write data and metadata corresponding to i1 and i2.  The io is
+ * started but we do not wait for any of it to finish.
+ *
+ * filemap_flush is used for the block device, so if there is a dirty
+ * page for a block already in flight, we will not wait and start the
+ * io over again
+ */
+int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2)
+{
+       int ret = 0;
+       if (!MSDOS_SB(sb)->options.flush)
+               return 0;
+       if (i1)
+               ret = writeback_inode(i1);
+       if (!ret && i2)
+               ret = writeback_inode(i2);
+       if (!ret) {
+               struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
+               ret = filemap_flush(mapping);
+       }
+       return ret;
+}
+EXPORT_SYMBOL_GPL(fat_flush_inodes);
 
 static int __init init_fat_fs(void)
 {