mm: merge populate and nopage into fault (fixes nonlinear)
[safe/jmp/linux-2.6] / mm / shmem.c
index e537317..6b44440 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/xattr.h>
+#include <linux/exportfs.h>
 #include <linux/generic_acl.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -82,6 +83,7 @@ enum sgp_type {
        SGP_READ,       /* don't exceed i_size, don't allocate page */
        SGP_CACHE,      /* don't exceed i_size, may allocate page */
        SGP_WRITE,      /* may exceed i_size, may allocate page */
+       SGP_FAULT,      /* same as SGP_CACHE, return with page locked */
 };
 
 static int shmem_getpage(struct inode *inode, unsigned long idx,
@@ -93,8 +95,11 @@ static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
         * The above definition of ENTRIES_PER_PAGE, and the use of
         * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
         * might be reconsidered if it ever diverges from PAGE_SIZE.
+        *
+        * __GFP_MOVABLE is masked out as swap vectors cannot move
         */
-       return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
+       return alloc_pages((gfp_mask & ~__GFP_MOVABLE) | __GFP_ZERO,
+                               PAGE_CACHE_SHIFT-PAGE_SHIFT);
 }
 
 static inline void shmem_dir_free(struct page *page)
@@ -372,7 +377,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
                }
 
                spin_unlock(&info->lock);
-               page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
+               page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
                if (page)
                        set_page_private(page, 0);
                spin_lock(&info->lock);
@@ -967,6 +972,8 @@ static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_
                *nodelist++ = '\0';
                if (nodelist_parse(nodelist, *policy_nodes))
                        goto out;
+               if (!nodes_subset(*policy_nodes, node_online_map))
+                       goto out;
        }
        if (!strcmp(value, "default")) {
                *policy = MPOL_DEFAULT;
@@ -1094,13 +1101,17 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
 
        if (idx >= SHMEM_MAX_INDEX)
                return -EFBIG;
+
+       if (type)
+               *type = VM_FAULT_MINOR;
+
        /*
         * Normally, filepage is NULL on entry, and either found
         * uptodate immediately, or allocated and zeroed, or read
         * in under swappage, which is then assigned to filepage.
-        * But shmem_prepare_write passes in a locked filepage,
-        * which may be found not uptodate by other callers too,
-        * and may need to be copied from the swappage read in.
+        * But shmem_readpage and shmem_prepare_write pass in a locked
+        * filepage, which may be found not uptodate by other callers
+        * too, and may need to be copied from the swappage read in.
         */
 repeat:
        if (!filepage)
@@ -1283,8 +1294,10 @@ repeat:
        }
 done:
        if (*pagep != filepage) {
-               unlock_page(filepage);
                *pagep = filepage;
+               if (sgp != SGP_FAULT)
+                       unlock_page(filepage);
+
        }
        return 0;
 
@@ -1296,74 +1309,31 @@ failed:
        return error;
 }
 
-static struct page *shmem_nopage(struct vm_area_struct *vma,
-                                unsigned long address, int *type)
+static struct page *shmem_fault(struct vm_area_struct *vma,
+                                       struct fault_data *fdata)
 {
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        struct page *page = NULL;
-       unsigned long idx;
        int error;
 
-       idx = (address - vma->vm_start) >> PAGE_SHIFT;
-       idx += vma->vm_pgoff;
-       idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
-       if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
-               return NOPAGE_SIGBUS;
+       BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
 
-       error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
-       if (error)
-               return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
+       if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+               fdata->type = VM_FAULT_SIGBUS;
+               return NULL;
+       }
+
+       error = shmem_getpage(inode, fdata->pgoff, &page,
+                                               SGP_FAULT, &fdata->type);
+       if (error) {
+               fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS);
+               return NULL;
+       }
 
        mark_page_accessed(page);
        return page;
 }
 
-static int shmem_populate(struct vm_area_struct *vma,
-       unsigned long addr, unsigned long len,
-       pgprot_t prot, unsigned long pgoff, int nonblock)
-{
-       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-       struct mm_struct *mm = vma->vm_mm;
-       enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
-       unsigned long size;
-
-       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
-               return -EINVAL;
-
-       while ((long) len > 0) {
-               struct page *page = NULL;
-               int err;
-               /*
-                * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
-                */
-               err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
-               if (err)
-                       return err;
-               /* Page may still be null, but only if nonblock was set. */
-               if (page) {
-                       mark_page_accessed(page);
-                       err = install_page(mm, vma, addr, page, prot);
-                       if (err) {
-                               page_cache_release(page);
-                               return err;
-                       }
-               } else if (vma->vm_flags & VM_NONLINEAR) {
-                       /* No page was found just because we can't read it in
-                        * now (being here implies nonblock != 0), but the page
-                        * may exist, so set the PTE to fault it in later. */
-                       err = install_file_pte(mm, vma, addr, pgoff, prot);
-                       if (err)
-                               return err;
-               }
-
-               len -= PAGE_SIZE;
-               addr += PAGE_SIZE;
-               pgoff++;
-       }
-       return 0;
-}
-
 #ifdef CONFIG_NUMA
 int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
 {
@@ -1408,6 +1378,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
        file_accessed(file);
        vma->vm_ops = &shmem_vm_ops;
+       vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
        return 0;
 }
 
@@ -1483,9 +1454,18 @@ static const struct inode_operations shmem_symlink_inode_operations;
 static const struct inode_operations shmem_symlink_inline_operations;
 
 /*
- * Normally tmpfs makes no use of shmem_prepare_write, but it
- * lets a tmpfs file be used read-write below the loop driver.
+ * Normally tmpfs avoids the use of shmem_readpage and shmem_prepare_write;
+ * but providing them allows a tmpfs file to be used for splice, sendfile, and
+ * below the loop driver, in the generic fashion that many filesystems support.
  */
+static int shmem_readpage(struct file *file, struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
+       unlock_page(page);
+       return error;
+}
+
 static int
 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 {
@@ -1709,25 +1689,6 @@ static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count
        return desc.error;
 }
 
-static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
-                        size_t count, read_actor_t actor, void *target)
-{
-       read_descriptor_t desc;
-
-       if (!count)
-               return 0;
-
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.data = target;
-       desc.error = 0;
-
-       do_shmem_file_read(in_file, ppos, &desc, actor);
-       if (desc.written)
-               return desc.written;
-       return desc.error;
-}
-
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2384,6 +2345,7 @@ static const struct address_space_operations shmem_aops = {
        .writepage      = shmem_writepage,
        .set_page_dirty = __set_page_dirty_no_writeback,
 #ifdef CONFIG_TMPFS
+       .readpage       = shmem_readpage,
        .prepare_write  = shmem_prepare_write,
        .commit_write   = simple_commit_write,
 #endif
@@ -2397,7 +2359,8 @@ static const struct file_operations shmem_file_operations = {
        .read           = shmem_file_read,
        .write          = shmem_file_write,
        .fsync          = simple_sync_file,
-       .sendfile       = shmem_file_sendfile,
+       .splice_read    = generic_file_splice_read,
+       .splice_write   = generic_file_splice_write,
 #endif
 };
 
@@ -2461,8 +2424,7 @@ static const struct super_operations shmem_ops = {
 };
 
 static struct vm_operations_struct shmem_vm_ops = {
-       .nopage         = shmem_nopage,
-       .populate       = shmem_populate,
+       .fault          = shmem_fault,
 #ifdef CONFIG_NUMA
        .set_policy     = shmem_set_policy,
        .get_policy     = shmem_get_policy,
@@ -2598,5 +2560,6 @@ int shmem_zero_setup(struct vm_area_struct *vma)
                fput(vma->vm_file);
        vma->vm_file = file;
        vma->vm_ops = &shmem_vm_ops;
+       vma->vm_flags |= VM_CAN_INVALIDATE;
        return 0;
 }