#include <linux/fs.h>
#include <linux/personality.h>
#include <linux/security.h>
-#include <linux/ima.h>
#include <linux/hugetlb.h>
#include <linux/profile.h>
#include <linux/module.h>
if (!(flags & MAP_FIXED))
addr = round_hint_to_min(addr);
- error = arch_mmap_check(addr, len, flags);
- if (error)
- return error;
-
/* Careful about overflows.. */
len = PAGE_ALIGN(len);
- if (!len || len > TASK_SIZE)
+ if (!len)
return -ENOMEM;
/* offset overflow? */
error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
if (error)
return error;
- error = ima_file_mmap(file, prot);
- if (error)
- return error;
return mmap_region(file, addr, len, flags, vm_flags, pgoff);
}
EXPORT_SYMBOL(do_mmap_pgoff);
+SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, pgoff)
+{
+ struct file *file = NULL;
+ unsigned long retval = -EBADF;
+
+ if (!(flags & MAP_ANONYMOUS)) {
+ if (unlikely(flags & MAP_HUGETLB))
+ return -EINVAL;
+ file = fget(fd);
+ if (!file)
+ goto out;
+ } else if (flags & MAP_HUGETLB) {
+ struct user_struct *user = NULL;
+ /*
+ * VM_NORESERVE is used because the reservations will be
+ * taken when vm_ops->mmap() is called
+ * A dummy user value is used because we are not locking
+ * memory so no accounting is necessary
+ */
+ len = ALIGN(len, huge_page_size(&default_hstate));
+ file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
+ &user, HUGETLB_ANONHUGE_INODE);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+ }
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+
+ down_write(¤t->mm->mmap_sem);
+ retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+ up_write(¤t->mm->mmap_sem);
+
+ if (file)
+ fput(file);
+out:
+ return retval;
+}
+
/*
* Some shared mappigns will want the pages marked read-only
* to track write events. If so, we'll downgrade vm_page_prot
goto unmap_and_free_vma;
if (vm_flags & VM_EXECUTABLE)
added_exe_file_vma(mm);
+
+ /* Can addr have changed??
+ *
+ * Answer: Yes, several device drivers can do it in their
+ * f_op->mmap method. -DaveM
+ */
+ addr = vma->vm_start;
+ pgoff = vma->vm_pgoff;
+ vm_flags = vma->vm_flags;
} else if (vm_flags & VM_SHARED) {
error = shmem_zero_setup(vma);
if (error)
goto free_vma;
}
- /* Can addr have changed??
- *
- * Answer: Yes, several device drivers can do it in their
- * f_op->mmap method. -DaveM
- */
- addr = vma->vm_start;
- pgoff = vma->vm_pgoff;
- vm_flags = vma->vm_flags;
+ if (vma_wants_writenotify(vma)) {
+ pgprot_t pprot = vma->vm_page_prot;
- if (vma_wants_writenotify(vma))
+ /* Can vma->vm_page_prot have changed??
+ *
+ * Answer: Yes, drivers may have changed it in their
+ * f_op->mmap method.
+ *
+ * Ensures that vmas marked as uncached stay that way.
+ */
vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
+ if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ }
vma_link(mm, vma, prev, rb_link, rb_parent);
file = vma->vm_file;
unsigned long (*get_area)(struct file *, unsigned long,
unsigned long, unsigned long, unsigned long);
+ unsigned long error = arch_mmap_check(addr, len, flags);
+ if (error)
+ return error;
+
+ /* Careful about overflows.. */
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
get_area = current->mm->get_unmapped_area;
if (file && file->f_op && file->f_op->get_unmapped_area)
get_area = file->f_op->get_unmapped_area;
}
/*
- * Split a vma into two pieces at address 'addr', a new vma is allocated
- * either for the first part or the tail.
+ * __split_vma() bypasses sysctl_max_map_count checking. We use this on the
+ * munmap path where it doesn't make sense to fail.
*/
-int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long addr, int new_below)
{
struct mempolicy *pol;
~(huge_page_mask(hstate_vma(vma)))))
return -EINVAL;
- if (mm->map_count >= sysctl_max_map_count)
- return -ENOMEM;
-
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!new)
return -ENOMEM;
return 0;
}
+/*
+ * Split a vma into two pieces at address 'addr', a new vma is allocated
+ * either for the first part or the tail.
+ */
+int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, int new_below)
+{
+ if (mm->map_count >= sysctl_max_map_count)
+ return -ENOMEM;
+
+ return __split_vma(mm, vma, addr, new_below);
+}
+
/* Munmap is split into 2 main parts -- this part which finds
* what needs doing, and the areas themselves, which do the
* work. This now handles partial unmappings.
* places tmp vma above, and higher split_vma places tmp vma below.
*/
if (start > vma->vm_start) {
- int error = split_vma(mm, vma, start, 0);
+ int error;
+
+ /*
+ * Make sure that map_count on return from munmap() will
+ * not exceed its limit; but let map_count go just above
+ * its limit temporarily, to help free resources as expected.
+ */
+ if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
+ return -ENOMEM;
+
+ error = __split_vma(mm, vma, start, 0);
if (error)
return error;
prev = vma;
/* Does it split the last one? */
last = find_vma(mm, end);
if (last && end > last->vm_start) {
- int error = split_vma(mm, last, end, 1);
+ int error = __split_vma(mm, last, end, 1);
if (error)
return error;
}
if (!len)
return addr;
- if ((addr + len) > TASK_SIZE || (addr + len) < addr)
- return -EINVAL;
-
- if (is_hugepage_only_range(mm, addr, len))
- return -EINVAL;
-
error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
if (error)
return error;
flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
- error = arch_mmap_check(addr, len, flags);
- if (error)
+ error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
+ if (error & ~PAGE_MASK)
return error;
/*
{
}
-static struct vm_operations_struct special_mapping_vmops = {
+static const struct vm_operations_struct special_mapping_vmops = {
.close = special_mapping_close,
.fault = special_mapping_fault,
};