[PATCH] r/o bind mounts: unlink: monitor i_nlink
[safe/jmp/linux-2.6] / mm / shmem.c
index 65c148e..908dd94 100644 (file)
  * which makes it a completely usable filesystem.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/devfs_fs_kernel.h>
 #include <linux/fs.h>
+#include <linux/xattr.h>
+#include <linux/generic_acl.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/file.h>
 #include <linux/swapops.h>
 #include <linux/mempolicy.h>
 #include <linux/namei.h>
+#include <linux/ctype.h>
+#include <linux/migrate.h>
+#include <linux/highmem.h>
+
 #include <asm/uaccess.h>
 #include <asm/div64.h>
 #include <asm/pgtable.h>
@@ -171,10 +175,11 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
 }
 
 static struct super_operations shmem_ops;
-static struct address_space_operations shmem_aops;
+static const struct address_space_operations shmem_aops;
 static struct file_operations shmem_file_operations;
 static struct inode_operations shmem_inode_operations;
 static struct inode_operations shmem_dir_inode_operations;
+static struct inode_operations shmem_special_inode_operations;
 static struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
@@ -635,7 +640,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
        struct page *page = NULL;
        int error;
 
-       if (attr->ia_valid & ATTR_SIZE) {
+       if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
                if (attr->ia_size < inode->i_size) {
                        /*
                         * If truncating down to a partial page, then
@@ -668,6 +673,10 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
        error = inode_change_ok(inode, attr);
        if (!error)
                error = inode_setattr(inode, attr);
+#ifdef CONFIG_TMPFS_POSIX_ACL
+       if (!error && (attr->ia_valid & ATTR_MODE))
+               error = generic_acl_chmod(inode, &shmem_acl_ops);
+#endif
        if (page)
                page_cache_release(page);
        return error;
@@ -874,6 +883,51 @@ redirty:
 }
 
 #ifdef CONFIG_NUMA
+static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+{
+       char *nodelist = strchr(value, ':');
+       int err = 1;
+
+       if (nodelist) {
+               /* NUL-terminate policy string */
+               *nodelist++ = '\0';
+               if (nodelist_parse(nodelist, *policy_nodes))
+                       goto out;
+       }
+       if (!strcmp(value, "default")) {
+               *policy = MPOL_DEFAULT;
+               /* Don't allow a nodelist */
+               if (!nodelist)
+                       err = 0;
+       } else if (!strcmp(value, "prefer")) {
+               *policy = MPOL_PREFERRED;
+               /* Insist on a nodelist of one node only */
+               if (nodelist) {
+                       char *rest = nodelist;
+                       while (isdigit(*rest))
+                               rest++;
+                       if (!*rest)
+                               err = 0;
+               }
+       } else if (!strcmp(value, "bind")) {
+               *policy = MPOL_BIND;
+               /* Insist on a nodelist */
+               if (nodelist)
+                       err = 0;
+       } else if (!strcmp(value, "interleave")) {
+               *policy = MPOL_INTERLEAVE;
+               /* Default to nodes online if no nodelist */
+               if (!nodelist)
+                       *policy_nodes = node_online_map;
+               err = 0;
+       }
+out:
+       /* Restore string for error message */
+       if (nodelist)
+               *--nodelist = ':';
+       return err;
+}
+
 static struct page *shmem_swapin_async(struct shared_policy *p,
                                       swp_entry_t entry, unsigned long idx)
 {
@@ -926,6 +980,11 @@ shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
        return page;
 }
 #else
+static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+{
+       return 1;
+}
+
 static inline struct page *
 shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
 {
@@ -993,12 +1052,12 @@ repeat:
                swappage = lookup_swap_cache(swap);
                if (!swappage) {
                        shmem_swp_unmap(entry);
-                       spin_unlock(&info->lock);
                        /* here we actually do the io */
                        if (type && *type == VM_FAULT_MINOR) {
-                               inc_page_state(pgmajfault);
+                               __count_vm_event(PGMAJFAULT);
                                *type = VM_FAULT_MAJOR;
                        }
+                       spin_unlock(&info->lock);
                        swappage = shmem_swapin(info, swap, idx);
                        if (!swappage) {
                                spin_lock(&info->lock);
@@ -1270,7 +1329,7 @@ out_nomem:
        return retval;
 }
 
-static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
        file_accessed(file);
        vma->vm_ops = &shmem_vm_ops;
@@ -1299,7 +1358,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
                inode->i_mode = mode;
                inode->i_uid = current->fsuid;
                inode->i_gid = current->fsgid;
-               inode->i_blksize = PAGE_CACHE_SIZE;
                inode->i_blocks = 0;
                inode->i_mapping->a_ops = &shmem_aops;
                inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
@@ -1311,12 +1369,14 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
 
                switch (mode & S_IFMT) {
                default:
+                       inode->i_op = &shmem_special_inode_operations;
                        init_special_inode(inode, mode, dev);
                        break;
                case S_IFREG:
                        inode->i_op = &shmem_inode_operations;
                        inode->i_fop = &shmem_file_operations;
-                       mpol_shared_policy_init(&info->policy);
+                       mpol_shared_policy_init(&info->policy, sbinfo->policy,
+                                                       &sbinfo->policy_nodes);
                        break;
                case S_IFDIR:
                        inode->i_nlink++;
@@ -1330,7 +1390,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
                         * Must not load anything in the rbtree,
                         * mpol_free_shared_policy will not be called.
                         */
-                       mpol_shared_policy_init(&info->policy);
+                       mpol_shared_policy_init(&info->policy, MPOL_DEFAULT,
+                                               NULL);
                        break;
                }
        } else if (sbinfo->max_inodes) {
@@ -1370,7 +1431,7 @@ shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t
        if (!access_ok(VERIFY_READ, buf, count))
                return -EFAULT;
 
-       down(&inode->i_sem);
+       mutex_lock(&inode->i_mutex);
 
        pos = *ppos;
        written = 0;
@@ -1455,7 +1516,7 @@ shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t
        if (written)
                err = written;
 out:
-       up(&inode->i_sem);
+       mutex_unlock(&inode->i_mutex);
        return err;
 }
 
@@ -1491,7 +1552,7 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
 
                /*
                 * We must evaluate after, since reads (unlike writes)
-                * are called without i_sem protection against truncate
+                * are called without i_mutex protection against truncate
                 */
                nr = PAGE_CACHE_SIZE;
                i_size = i_size_read(inode);
@@ -1591,9 +1652,9 @@ static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
        return desc.error;
 }
 
-static int shmem_statfs(struct super_block *sb, struct kstatfs *buf)
+static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-       struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+       struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
 
        buf->f_type = TMPFS_MAGIC;
        buf->f_bsize = PAGE_CACHE_SIZE;
@@ -1629,7 +1690,11 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
                                iput(inode);
                                return error;
                        }
-                       error = 0;
+               }
+               error = shmem_acl_init(inode, dir);
+               if (error) {
+                       iput(inode);
+                       return error;
                }
                if (dir->i_mode & S_ISGID) {
                        inode->i_gid = dir->i_gid;
@@ -1707,7 +1772,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 
        dir->i_size -= BOGO_DIRENT_SIZE;
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-       inode->i_nlink--;
+       drop_nlink(inode);
        dput(dentry);   /* Undo the count from "create" - this does all the work */
        return 0;
 }
@@ -1717,7 +1782,8 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
        if (!simple_empty(dentry))
                return -ENOTEMPTY;
 
-       dir->i_nlink--;
+       drop_nlink(dentry->d_inode);
+       drop_nlink(dir);
        return shmem_unlink(dir, dentry);
 }
 
@@ -1738,9 +1804,9 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct
        if (new_dentry->d_inode) {
                (void) shmem_unlink(new_dir, new_dentry);
                if (they_are_dirs)
-                       old_dir->i_nlink--;
+                       drop_nlink(old_dir);
        } else if (they_are_dirs) {
-               old_dir->i_nlink--;
+               drop_nlink(old_dir);
                new_dir->i_nlink++;
        }
 
@@ -1843,11 +1909,76 @@ static struct inode_operations shmem_symlink_inode_operations = {
        .put_link       = shmem_put_link,
 };
 
-static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
+#ifdef CONFIG_TMPFS_POSIX_ACL
+/**
+ * Superblocks without xattr inode operations will get security.* xattr
+ * support from the VFS "for free". As soon as we have any other xattrs
+ * like ACLs, we also need to implement the security.* handlers at
+ * filesystem level, though.
+ */
+
+static size_t shmem_xattr_security_list(struct inode *inode, char *list,
+                                       size_t list_len, const char *name,
+                                       size_t name_len)
+{
+       return security_inode_listsecurity(inode, list, list_len);
+}
+
+static int shmem_xattr_security_get(struct inode *inode, const char *name,
+                                   void *buffer, size_t size)
+{
+       if (strcmp(name, "") == 0)
+               return -EINVAL;
+       return security_inode_getsecurity(inode, name, buffer, size,
+                                         -EOPNOTSUPP);
+}
+
+static int shmem_xattr_security_set(struct inode *inode, const char *name,
+                                   const void *value, size_t size, int flags)
+{
+       if (strcmp(name, "") == 0)
+               return -EINVAL;
+       return security_inode_setsecurity(inode, name, value, size, flags);
+}
+
+struct xattr_handler shmem_xattr_security_handler = {
+       .prefix = XATTR_SECURITY_PREFIX,
+       .list   = shmem_xattr_security_list,
+       .get    = shmem_xattr_security_get,
+       .set    = shmem_xattr_security_set,
+};
+
+static struct xattr_handler *shmem_xattr_handlers[] = {
+       &shmem_xattr_acl_access_handler,
+       &shmem_xattr_acl_default_handler,
+       &shmem_xattr_security_handler,
+       NULL
+};
+#endif
+
+static int shmem_parse_options(char *options, int *mode, uid_t *uid,
+       gid_t *gid, unsigned long *blocks, unsigned long *inodes,
+       int *policy, nodemask_t *policy_nodes)
 {
        char *this_char, *value, *rest;
 
-       while ((this_char = strsep(&options, ",")) != NULL) {
+       while (options != NULL) {
+               this_char = options;
+               for (;;) {
+                       /*
+                        * NUL-terminate this option: unfortunately,
+                        * mount options form a comma-separated list,
+                        * but mpol's nodelist may also contain commas.
+                        */
+                       options = strchr(options, ',');
+                       if (options == NULL)
+                               break;
+                       options++;
+                       if (!isdigit(*options)) {
+                               options[-1] = '\0';
+                               break;
+                       }
+               }
                if (!*this_char)
                        continue;
                if ((value = strchr(this_char,'=')) != NULL) {
@@ -1897,6 +2028,9 @@ static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid,
                        *gid = simple_strtoul(value,&rest,0);
                        if (*rest)
                                goto bad_val;
+               } else if (!strcmp(this_char,"mpol")) {
+                       if (shmem_parse_mpol(value,policy,policy_nodes))
+                               goto bad_val;
                } else {
                        printk(KERN_ERR "tmpfs: Bad mount option %s\n",
                               this_char);
@@ -1917,12 +2051,14 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
        unsigned long max_blocks = sbinfo->max_blocks;
        unsigned long max_inodes = sbinfo->max_inodes;
+       int policy = sbinfo->policy;
+       nodemask_t policy_nodes = sbinfo->policy_nodes;
        unsigned long blocks;
        unsigned long inodes;
        int error = -EINVAL;
 
-       if (shmem_parse_options(data, NULL, NULL, NULL,
-                               &max_blocks, &max_inodes))
+       if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks,
+                               &max_inodes, &policy, &policy_nodes))
                return error;
 
        spin_lock(&sbinfo->stat_lock);
@@ -1948,6 +2084,8 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
        sbinfo->free_blocks = max_blocks - blocks;
        sbinfo->max_inodes  = max_inodes;
        sbinfo->free_inodes = max_inodes - inodes;
+       sbinfo->policy = policy;
+       sbinfo->policy_nodes = policy_nodes;
 out:
        spin_unlock(&sbinfo->stat_lock);
        return error;
@@ -1972,6 +2110,8 @@ static int shmem_fill_super(struct super_block *sb,
        struct shmem_sb_info *sbinfo;
        unsigned long blocks = 0;
        unsigned long inodes = 0;
+       int policy = MPOL_DEFAULT;
+       nodemask_t policy_nodes = node_online_map;
 
 #ifdef CONFIG_TMPFS
        /*
@@ -1984,8 +2124,8 @@ static int shmem_fill_super(struct super_block *sb,
                inodes = totalram_pages - totalhigh_pages;
                if (inodes > blocks)
                        inodes = blocks;
-               if (shmem_parse_options(data, &mode, &uid, &gid,
-                                       &blocks, &inodes))
+               if (shmem_parse_options(data, &mode, &uid, &gid, &blocks,
+                                       &inodes, &policy, &policy_nodes))
                        return -EINVAL;
        }
 #else
@@ -2003,6 +2143,8 @@ static int shmem_fill_super(struct super_block *sb,
        sbinfo->free_blocks = blocks;
        sbinfo->max_inodes = inodes;
        sbinfo->free_inodes = inodes;
+       sbinfo->policy = policy;
+       sbinfo->policy_nodes = policy_nodes;
 
        sb->s_fs_info = sbinfo;
        sb->s_maxbytes = SHMEM_MAX_BYTES;
@@ -2010,6 +2152,11 @@ static int shmem_fill_super(struct super_block *sb,
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
        sb->s_magic = TMPFS_MAGIC;
        sb->s_op = &shmem_ops;
+       sb->s_time_gran = 1;
+#ifdef CONFIG_TMPFS_POSIX_ACL
+       sb->s_xattr = shmem_xattr_handlers;
+       sb->s_flags |= MS_POSIXACL;
+#endif
 
        inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
        if (!inode)
@@ -2029,7 +2176,7 @@ failed:
        return err;
 }
 
-static kmem_cache_t *shmem_inode_cachep;
+static struct kmem_cache *shmem_inode_cachep;
 
 static struct inode *shmem_alloc_inode(struct super_block *sb)
 {
@@ -2046,16 +2193,22 @@ static void shmem_destroy_inode(struct inode *inode)
                /* only struct inode is valid if it's an inline symlink */
                mpol_free_shared_policy(&SHMEM_I(inode)->policy);
        }
+       shmem_acl_destroy_inode(inode);
        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep,
+                     unsigned long flags)
 {
        struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
 
        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
            SLAB_CTOR_CONSTRUCTOR) {
                inode_init_once(&p->vfs_inode);
+#ifdef CONFIG_TMPFS_POSIX_ACL
+               p->i_acl = NULL;
+               p->i_default_acl = NULL;
+#endif
        }
 }
 
@@ -2071,17 +2224,17 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
-       if (kmem_cache_destroy(shmem_inode_cachep))
-               printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
+       kmem_cache_destroy(shmem_inode_cachep);
 }
 
-static struct address_space_operations shmem_aops = {
+static const struct address_space_operations shmem_aops = {
        .writepage      = shmem_writepage,
        .set_page_dirty = __set_page_dirty_nobuffers,
 #ifdef CONFIG_TMPFS
        .prepare_write  = shmem_prepare_write,
        .commit_write   = simple_commit_write,
 #endif
+       .migratepage    = migrate_page,
 };
 
 static struct file_operations shmem_file_operations = {
@@ -2099,6 +2252,14 @@ static struct inode_operations shmem_inode_operations = {
        .truncate       = shmem_truncate,
        .setattr        = shmem_notify_change,
        .truncate_range = shmem_truncate_range,
+#ifdef CONFIG_TMPFS_POSIX_ACL
+       .setxattr       = generic_setxattr,
+       .getxattr       = generic_getxattr,
+       .listxattr      = generic_listxattr,
+       .removexattr    = generic_removexattr,
+       .permission     = shmem_permission,
+#endif
+
 };
 
 static struct inode_operations shmem_dir_inode_operations = {
@@ -2113,6 +2274,25 @@ static struct inode_operations shmem_dir_inode_operations = {
        .mknod          = shmem_mknod,
        .rename         = shmem_rename,
 #endif
+#ifdef CONFIG_TMPFS_POSIX_ACL
+       .setattr        = shmem_notify_change,
+       .setxattr       = generic_setxattr,
+       .getxattr       = generic_getxattr,
+       .listxattr      = generic_listxattr,
+       .removexattr    = generic_removexattr,
+       .permission     = shmem_permission,
+#endif
+};
+
+static struct inode_operations shmem_special_inode_operations = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
+       .setattr        = shmem_notify_change,
+       .setxattr       = generic_setxattr,
+       .getxattr       = generic_getxattr,
+       .listxattr      = generic_listxattr,
+       .removexattr    = generic_removexattr,
+       .permission     = shmem_permission,
+#endif
 };
 
 static struct super_operations shmem_ops = {
@@ -2137,10 +2317,10 @@ static struct vm_operations_struct shmem_vm_ops = {
 };
 
 
-static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *data)
+static int shmem_get_sb(struct file_system_type *fs_type,
+       int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-       return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
+       return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt);
 }
 
 static struct file_system_type tmpfs_fs_type = {
@@ -2164,10 +2344,8 @@ static int __init init_tmpfs(void)
                printk(KERN_ERR "Could not register tmpfs\n");
                goto out2;
        }
-#ifdef CONFIG_TMPFS
-       devfs_mk_dir("shm");
-#endif
-       shm_mnt = do_kern_mount(tmpfs_fs_type.name, MS_NOUSER,
+
+       shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
                                tmpfs_fs_type.name, NULL);
        if (IS_ERR(shm_mnt)) {
                error = PTR_ERR(shm_mnt);