*/
#include <linux/slab.h>
+#include <linux/backing-dev.h>
#include <linux/mm.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/mount.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlb.h>
+#include <asm/mmu_context.h>
+
+#include "internal.h"
#ifndef arch_mmap_check
#define arch_mmap_check(addr, len, flags) (0)
#endif
+#ifndef arch_rebalance_pgtables
+#define arch_rebalance_pgtables(addr, len) (addr)
+#endif
+
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end);
pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
- return protection_map[vm_flags &
- (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+ return __pgprot(pgprot_val(protection_map[vm_flags &
+ (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
+ pgprot_val(arch_vm_get_page_prot(vm_flags)));
}
EXPORT_SYMBOL(vm_get_page_prot);
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio = 50; /* default is 50% */
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
-atomic_t vm_committed_space = ATOMIC_INIT(0);
+atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
/*
* Check that a process has enough memory to allocate a new virtual
* Note this is a helper function intended to be used by LSMs which
* wish to use this logic.
*/
-int __vm_enough_memory(long pages, int cap_sys_admin)
+int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
{
unsigned long free, allowed;
/* Don't let a single process grow too big:
leave 3% of the size of this process for other processes */
- allowed -= current->mm->total_vm / 32;
+ allowed -= mm->total_vm / 32;
/*
* cast `allowed' as a signed long because vm_committed_space
* sometimes has a negative value
*/
- if (atomic_read(&vm_committed_space) < (long)allowed)
+ if (atomic_long_read(&vm_committed_space) < (long)allowed)
return 0;
error:
vm_unacct_memory(pages);
return -ENOMEM;
}
-EXPORT_SYMBOL(__vm_enough_memory);
-
/*
* Requires inode->i_mapping->i_mmap_lock
*/
might_sleep();
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
- if (vma->vm_file)
+ if (vma->vm_file) {
fput(vma->vm_file);
- mpol_free(vma_policy(vma));
+ if (vma->vm_flags & VM_EXECUTABLE)
+ removed_exe_file_vma(vma->vm_mm);
+ }
+ mpol_put(vma_policy(vma));
kmem_cache_free(vm_area_cachep, vma);
return next;
}
unsigned long rlim, retval;
unsigned long newbrk, oldbrk;
struct mm_struct *mm = current->mm;
+ unsigned long min_brk;
down_write(&mm->mmap_sem);
- if (brk < mm->end_code)
+#ifdef CONFIG_COMPAT_BRK
+ min_brk = mm->end_code;
+#else
+ min_brk = mm->start_brk;
+#endif
+ if (brk < min_brk)
goto out;
/*
* not page aligned -Ram Gupta
*/
rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
- if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+ if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
+ (mm->end_data - mm->start_data) > rlim)
goto out;
newbrk = PAGE_ALIGN(brk);
printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
i++;
pn = nd;
+ prev = vma->vm_start;
+ pend = vma->vm_end;
}
j = 0;
for (nd = pn; nd; nd = rb_prev(nd)) {
if (vma_tmp->vm_end > addr) {
vma = vma_tmp;
if (vma_tmp->vm_start <= addr)
- return vma;
+ break;
__rb_link = &__rb_parent->rb_left;
} else {
rb_prev = __rb_parent;
rb_insert_color(&vma->vm_rb, &mm->mm_rb);
}
-static inline void __vma_link_file(struct vm_area_struct *vma)
+static void __vma_link_file(struct vm_area_struct *vma)
{
struct file * file;
spin_unlock(&mapping->i_mmap_lock);
if (remove_next) {
- if (file)
+ if (file) {
fput(file);
+ if (next->vm_flags & VM_EXECUTABLE)
+ removed_exe_file_vma(mm);
+ }
mm->map_count--;
- mpol_free(vma_policy(next));
+ mpol_put(vma_policy(next));
kmem_cache_free(vm_area_cachep, next);
/*
* In mprotect's case 6 (see comments on vma_merge),
* If the vma has a ->close operation then the driver probably needs to release
* per-vma resources, so we don't attempt to merge those.
*/
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
-
static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags)
{
unsigned long flags, unsigned long pgoff)
{
struct mm_struct * mm = current->mm;
- struct vm_area_struct * vma, * prev;
struct inode *inode;
unsigned int vm_flags;
- int correct_wcount = 0;
int error;
- struct rb_node ** rb_link, * rb_parent;
int accountable = 1;
- unsigned long charged = 0, reqprot = prot;
+ unsigned long reqprot = prot;
/*
* Does the application expect PROT_READ to imply PROT_EXEC?
if (!len)
return -EINVAL;
+ if (!(flags & MAP_FIXED))
+ addr = round_hint_to_min(addr);
+
error = arch_mmap_check(addr, len, flags);
if (error)
return error;
return -EPERM;
vm_flags |= VM_LOCKED;
}
+
/* mlock MCL_FUTURE? */
if (vm_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
} else {
switch (flags & MAP_TYPE) {
case MAP_SHARED:
+ /*
+ * Ignore pgoff.
+ */
+ pgoff = 0;
vm_flags |= VM_SHARED | VM_MAYSHARE;
break;
case MAP_PRIVATE:
}
}
- error = security_file_mmap(file, reqprot, prot, flags);
+ error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
if (error)
return error;
-
+
+ return mmap_region(file, addr, len, flags, vm_flags, pgoff,
+ accountable);
+}
+EXPORT_SYMBOL(do_mmap_pgoff);
+
+/*
+ * Some shared mappigns will want the pages marked read-only
+ * to track write events. If so, we'll downgrade vm_page_prot
+ * to the private version (using protection_map[] without the
+ * VM_SHARED bit).
+ */
+int vma_wants_writenotify(struct vm_area_struct *vma)
+{
+ unsigned int vm_flags = vma->vm_flags;
+
+ /* If it was private or non-writable, the write bit is already clear */
+ if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
+ return 0;
+
+ /* The backer wishes to know when pages are first written to? */
+ if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+ return 1;
+
+ /* The open routine did something to the protections already? */
+ if (pgprot_val(vma->vm_page_prot) !=
+ pgprot_val(vm_get_page_prot(vm_flags)))
+ return 0;
+
+ /* Specialty mapping? */
+ if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
+ return 0;
+
+ /* Can the mapping track the dirty pages? */
+ return vma->vm_file && vma->vm_file->f_mapping &&
+ mapping_cap_account_dirty(vma->vm_file->f_mapping);
+}
+
+unsigned long mmap_region(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long flags,
+ unsigned int vm_flags, unsigned long pgoff,
+ int accountable)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma, *prev;
+ int correct_wcount = 0;
+ int error;
+ struct rb_node **rb_link, *rb_parent;
+ unsigned long charged = 0;
+ struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
+
/* Clear old maps */
error = -ENOMEM;
munmap_back:
if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
+ if (flags & MAP_NORESERVE)
+ vm_flags |= VM_NORESERVE;
+
if (accountable && (!(flags & MAP_NORESERVE) ||
sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
if (vm_flags & VM_SHARED) {
* The VM_SHARED test is necessary because shmem_zero_setup
* will create the file object for a shared anonymous map below.
*/
- if (!file && !(vm_flags & VM_SHARED) &&
- vma_merge(mm, prev, addr, addr + len, vm_flags,
- NULL, NULL, pgoff, NULL))
- goto out;
+ if (!file && !(vm_flags & VM_SHARED)) {
+ vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
+ NULL, NULL, pgoff, NULL);
+ if (vma)
+ goto out;
+ }
/*
* Determine the object being mapped and call the appropriate
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_flags = vm_flags;
- vma->vm_page_prot = protection_map[vm_flags &
- (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+ vma->vm_page_prot = vm_get_page_prot(vm_flags);
vma->vm_pgoff = pgoff;
if (file) {
error = file->f_op->mmap(file, vma);
if (error)
goto unmap_and_free_vma;
+ if (vm_flags & VM_EXECUTABLE)
+ added_exe_file_vma(mm);
} else if (vm_flags & VM_SHARED) {
error = shmem_zero_setup(vma);
if (error)
vm_flags = vma->vm_flags;
if (vma_wants_writenotify(vma))
- vma->vm_page_prot =
- protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
+ vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
- if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
+ if (file && vma_merge(mm, prev, addr, vma->vm_end,
vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
- file = vma->vm_file;
- vma_link(mm, vma, prev, rb_link, rb_parent);
- if (correct_wcount)
- atomic_inc(&inode->i_writecount);
- } else {
- if (file) {
- if (correct_wcount)
- atomic_inc(&inode->i_writecount);
- fput(file);
- }
- mpol_free(vma_policy(vma));
+ mpol_put(vma_policy(vma));
kmem_cache_free(vm_area_cachep, vma);
+ fput(file);
+ if (vm_flags & VM_EXECUTABLE)
+ removed_exe_file_vma(mm);
+ } else {
+ vma_link(mm, vma, prev, rb_link, rb_parent);
+ file = vma->vm_file;
}
-out:
+
+ /* Once vma denies write, undo our temporary denial count */
+ if (correct_wcount)
+ atomic_inc(&inode->i_writecount);
+out:
mm->total_vm += len >> PAGE_SHIFT;
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
- mm->locked_vm += len >> PAGE_SHIFT;
+ /*
+ * makes pages present; downgrades, drops, reacquires mmap_sem
+ */
+ long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
+ if (nr_pages < 0)
+ return nr_pages; /* vma gone! */
+ mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
+ } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
make_pages_present(addr, addr + len);
- }
- if (flags & MAP_POPULATE) {
- up_write(&mm->mmap_sem);
- sys_remap_file_pages(addr, len, 0,
- pgoff, flags & MAP_NONBLOCK);
- down_write(&mm->mmap_sem);
- }
return addr;
unmap_and_free_vma:
return error;
}
-EXPORT_SYMBOL(do_mmap_pgoff);
-
/* Get an address range which is currently unmapped.
* For shmat() with addr=0.
*
if (len > TASK_SIZE)
return -ENOMEM;
+ if (flags & MAP_FIXED)
+ return addr;
+
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (len > TASK_SIZE)
return -ENOMEM;
+ if (flags & MAP_FIXED)
+ return addr;
+
/* requesting a specific address */
if (addr) {
addr = PAGE_ALIGN(addr);
get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
- unsigned long ret;
-
- if (!(flags & MAP_FIXED)) {
- unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
-
- get_area = current->mm->get_unmapped_area;
- if (file && file->f_op && file->f_op->get_unmapped_area)
- get_area = file->f_op->get_unmapped_area;
- addr = get_area(file, addr, len, pgoff, flags);
- if (IS_ERR_VALUE(addr))
- return addr;
- }
+ unsigned long (*get_area)(struct file *, unsigned long,
+ unsigned long, unsigned long, unsigned long);
+
+ get_area = current->mm->get_unmapped_area;
+ if (file && file->f_op && file->f_op->get_unmapped_area)
+ get_area = file->f_op->get_unmapped_area;
+ addr = get_area(file, addr, len, pgoff, flags);
+ if (IS_ERR_VALUE(addr))
+ return addr;
if (addr > TASK_SIZE - len)
return -ENOMEM;
if (addr & ~PAGE_MASK)
return -EINVAL;
- if (file && is_file_hugepages(file)) {
- /*
- * Check if the given range is hugepage aligned, and
- * can be made suitable for hugepages.
- */
- ret = prepare_hugepage_range(addr, len, pgoff);
- } else {
- /*
- * Ensure that a normal request is not falling in a
- * reserved hugepage range. For some archs like IA-64,
- * there is a separate region for hugepages.
- */
- ret = is_hugepage_only_range(current->mm, addr, len);
- }
- if (ret)
- return -EINVAL;
- return addr;
+
+ return arch_rebalance_pgtables(addr, len);
}
EXPORT_SYMBOL(get_unmapped_area);
* vma is the last one with address > vma->vm_end. Have to extend vma.
*/
#ifndef CONFIG_IA64
-static inline
+static
#endif
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
* vma->vm_start/vm_end cannot change under us because the caller
* is required to hold the mmap_sem in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
+ * Also guard against wrapping around to address 0.
*/
- address += 4 + PAGE_SIZE - 1;
- address &= PAGE_MASK;
+ if (address < PAGE_ALIGN(address+4))
+ address = PAGE_ALIGN(address+4);
+ else {
+ anon_vma_unlock(vma);
+ return -ENOMEM;
+ }
error = 0;
/* Somebody else might have raced and expanded it already */
}
#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
-#ifdef CONFIG_STACK_GROWSUP
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
-{
- return expand_upwards(vma, address);
-}
-
-struct vm_area_struct *
-find_extend_vma(struct mm_struct *mm, unsigned long addr)
-{
- struct vm_area_struct *vma, *prev;
-
- addr &= PAGE_MASK;
- vma = find_vma_prev(mm, addr, &prev);
- if (vma && (vma->vm_start <= addr))
- return vma;
- if (!prev || expand_stack(prev, addr))
- return NULL;
- if (prev->vm_flags & VM_LOCKED) {
- make_pages_present(addr, prev->vm_end);
- }
- return prev;
-}
-#else
/*
* vma is the first one with address < vma->vm_start. Have to extend vma.
*/
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+static int expand_downwards(struct vm_area_struct *vma,
+ unsigned long address)
{
int error;
*/
if (unlikely(anon_vma_prepare(vma)))
return -ENOMEM;
+
+ address &= PAGE_MASK;
+ error = security_file_mmap(NULL, 0, 0, 0, address, 1);
+ if (error)
+ return error;
+
anon_vma_lock(vma);
/*
* is required to hold the mmap_sem in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
- address &= PAGE_MASK;
- error = 0;
/* Somebody else might have raced and expanded it already */
if (address < vma->vm_start) {
return error;
}
+int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
+{
+ return expand_downwards(vma, address);
+}
+
+#ifdef CONFIG_STACK_GROWSUP
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+ return expand_upwards(vma, address);
+}
+
+struct vm_area_struct *
+find_extend_vma(struct mm_struct *mm, unsigned long addr)
+{
+ struct vm_area_struct *vma, *prev;
+
+ addr &= PAGE_MASK;
+ vma = find_vma_prev(mm, addr, &prev);
+ if (vma && (vma->vm_start <= addr))
+ return vma;
+ if (expand_stack(prev, addr))
+ return NULL;
+ if (prev->vm_flags & VM_LOCKED) {
+ if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
+ return NULL; /* vma gone! */
+ }
+ return prev;
+}
+#else
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+ return expand_downwards(vma, address);
+}
+
struct vm_area_struct *
find_extend_vma(struct mm_struct * mm, unsigned long addr)
{
if (expand_stack(vma, addr))
return NULL;
if (vma->vm_flags & VM_LOCKED) {
- make_pages_present(addr, start);
+ if (mlock_vma_pages_range(vma, addr, start) < 0)
+ return NULL; /* vma gone! */
}
return vma;
}
long nrpages = vma_pages(vma);
mm->total_vm -= nrpages;
- if (vma->vm_flags & VM_LOCKED)
- mm->locked_vm -= nrpages;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
vma = remove_vma(vma);
} while (vma);
update_hiwater_rss(mm);
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+ free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
tlb_finish_mmu(tlb, start, end);
}
/*
* Split a vma into two pieces at address 'addr', a new vma is allocated
- * either for the first part or the the tail.
+ * either for the first part or the tail.
*/
int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long addr, int new_below)
struct mempolicy *pol;
struct vm_area_struct *new;
- if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
+ if (is_vm_hugetlb_page(vma) && (addr &
+ ~(huge_page_mask(hstate_vma(vma)))))
return -EINVAL;
if (mm->map_count >= sysctl_max_map_count)
new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
}
- pol = mpol_copy(vma_policy(vma));
+ pol = mpol_dup(vma_policy(vma));
if (IS_ERR(pol)) {
kmem_cache_free(vm_area_cachep, new);
return PTR_ERR(pol);
}
vma_set_policy(new, pol);
- if (new->vm_file)
+ if (new->vm_file) {
get_file(new->vm_file);
+ if (vma->vm_flags & VM_EXECUTABLE)
+ added_exe_file_vma(mm);
+ }
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);
vma = prev? prev->vm_next: mm->mmap;
/*
+ * unlock any mlock()ed ranges before detaching vmas
+ */
+ if (mm->locked_vm) {
+ struct vm_area_struct *tmp = vma;
+ while (tmp && tmp->vm_start < end) {
+ if (tmp->vm_flags & VM_LOCKED) {
+ mm->locked_vm -= vma_pages(tmp);
+ munlock_vma_pages_all(tmp);
+ }
+ tmp = tmp->vm_next;
+ }
+ }
+
+ /*
* Remove the vma's, and unmap the actual pages
*/
detach_vmas_to_be_unmapped(mm, vma, prev, end);
if (is_hugepage_only_range(mm, addr, len))
return -EINVAL;
+ error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
+ if (error)
+ return error;
+
flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
error = arch_mmap_check(addr, len, flags);
return -ENOMEM;
/* Can we just expand an old private anonymous mapping? */
- if (vma_merge(mm, prev, addr, addr + len, flags,
- NULL, NULL, pgoff, NULL))
+ vma = vma_merge(mm, prev, addr, addr + len, flags,
+ NULL, NULL, pgoff, NULL);
+ if (vma)
goto out;
/*
vma->vm_end = addr + len;
vma->vm_pgoff = pgoff;
vma->vm_flags = flags;
- vma->vm_page_prot = protection_map[flags &
- (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+ vma->vm_page_prot = vm_get_page_prot(flags);
vma_link(mm, vma, prev, rb_link, rb_parent);
out:
mm->total_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED) {
- mm->locked_vm += len >> PAGE_SHIFT;
- make_pages_present(addr, addr + len);
+ if (!mlock_vma_pages_range(vma, addr, addr + len))
+ mm->locked_vm += (len >> PAGE_SHIFT);
}
return addr;
}
void exit_mmap(struct mm_struct *mm)
{
struct mmu_gather *tlb;
- struct vm_area_struct *vma = mm->mmap;
+ struct vm_area_struct *vma;
unsigned long nr_accounted = 0;
unsigned long end;
+ /* mm's last user has gone, and its about to be pulled down */
+ arch_exit_mmap(mm);
+ mmu_notifier_release(mm);
+
+ if (mm->locked_vm) {
+ vma = mm->mmap;
+ while (vma) {
+ if (vma->vm_flags & VM_LOCKED)
+ munlock_vma_pages_all(vma);
+ vma = vma->vm_next;
+ }
+ }
+ vma = mm->mmap;
lru_add_drain();
flush_cache_mm(mm);
tlb = tlb_gather_mmu(mm, 1);
/* Use -1 here to ensure all VMAs in the mm are unmapped */
end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+ free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
tlb_finish_mmu(tlb, 0, end);
/*
if (__vma && __vma->vm_start < vma->vm_end)
return -ENOMEM;
if ((vma->vm_flags & VM_ACCOUNT) &&
- security_vm_enough_memory(vma_pages(vma)))
+ security_vm_enough_memory_mm(mm, vma_pages(vma)))
return -ENOMEM;
vma_link(mm, vma, prev, rb_link, rb_parent);
return 0;
new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (new_vma) {
*new_vma = *vma;
- pol = mpol_copy(vma_policy(vma));
+ pol = mpol_dup(vma_policy(vma));
if (IS_ERR(pol)) {
kmem_cache_free(vm_area_cachep, new_vma);
return NULL;
new_vma->vm_start = addr;
new_vma->vm_end = addr + len;
new_vma->vm_pgoff = pgoff;
- if (new_vma->vm_file)
+ if (new_vma->vm_file) {
get_file(new_vma->vm_file);
+ if (vma->vm_flags & VM_EXECUTABLE)
+ added_exe_file_vma(mm);
+ }
if (new_vma->vm_ops && new_vma->vm_ops->open)
new_vma->vm_ops->open(new_vma);
vma_link(mm, new_vma, prev, rb_link, rb_parent);
return 0;
return 1;
}
+
+
+static int special_mapping_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ pgoff_t pgoff;
+ struct page **pages;
+
+ /*
+ * special mappings have no vm_file, and in that case, the mm
+ * uses vm_pgoff internally. So we have to subtract it from here.
+ * We are allowed to do this because we are the mm; do not copy
+ * this code into drivers!
+ */
+ pgoff = vmf->pgoff - vma->vm_pgoff;
+
+ for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
+ pgoff--;
+
+ if (*pages) {
+ struct page *page = *pages;
+ get_page(page);
+ vmf->page = page;
+ return 0;
+ }
+
+ return VM_FAULT_SIGBUS;
+}
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct special_mapping_vmops = {
+ .close = special_mapping_close,
+ .fault = special_mapping_fault,
+};
+
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+int install_special_mapping(struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, struct page **pages)
+{
+ struct vm_area_struct *vma;
+
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ if (unlikely(vma == NULL))
+ return -ENOMEM;
+
+ vma->vm_mm = mm;
+ vma->vm_start = addr;
+ vma->vm_end = addr + len;
+
+ vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+
+ vma->vm_ops = &special_mapping_vmops;
+ vma->vm_private_data = pages;
+
+ if (unlikely(insert_vm_struct(mm, vma))) {
+ kmem_cache_free(vm_area_cachep, vma);
+ return -ENOMEM;
+ }
+
+ mm->total_vm += len >> PAGE_SHIFT;
+
+ return 0;
+}
+
+static DEFINE_MUTEX(mm_all_locks_mutex);
+
+static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
+{
+ if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+ /*
+ * The LSB of head.next can't change from under us
+ * because we hold the mm_all_locks_mutex.
+ */
+ spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
+ /*
+ * We can safely modify head.next after taking the
+ * anon_vma->lock. If some other vma in this mm shares
+ * the same anon_vma we won't take it again.
+ *
+ * No need of atomic instructions here, head.next
+ * can't change from under us thanks to the
+ * anon_vma->lock.
+ */
+ if (__test_and_set_bit(0, (unsigned long *)
+ &anon_vma->head.next))
+ BUG();
+ }
+}
+
+static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
+{
+ if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+ /*
+ * AS_MM_ALL_LOCKS can't change from under us because
+ * we hold the mm_all_locks_mutex.
+ *
+ * Operations on ->flags have to be atomic because
+ * even if AS_MM_ALL_LOCKS is stable thanks to the
+ * mm_all_locks_mutex, there may be other cpus
+ * changing other bitflags in parallel to us.
+ */
+ if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
+ BUG();
+ spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+ }
+}
+
+/*
+ * This operation locks against the VM for all pte/vma/mm related
+ * operations that could ever happen on a certain mm. This includes
+ * vmtruncate, try_to_unmap, and all page faults.
+ *
+ * The caller must take the mmap_sem in write mode before calling
+ * mm_take_all_locks(). The caller isn't allowed to release the
+ * mmap_sem until mm_drop_all_locks() returns.
+ *
+ * mmap_sem in write mode is required in order to block all operations
+ * that could modify pagetables and free pages without need of
+ * altering the vma layout (for example populate_range() with
+ * nonlinear vmas). It's also needed in write mode to avoid new
+ * anon_vmas to be associated with existing vmas.
+ *
+ * A single task can't take more than one mm_take_all_locks() in a row
+ * or it would deadlock.
+ *
+ * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
+ * mapping->flags avoid to take the same lock twice, if more than one
+ * vma in this mm is backed by the same anon_vma or address_space.
+ *
+ * We can take all the locks in random order because the VM code
+ * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * takes more than one of them in a row. Secondly we're protected
+ * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
+ *
+ * mm_take_all_locks() and mm_drop_all_locks are expensive operations
+ * that may have to take thousand of locks.
+ *
+ * mm_take_all_locks() can fail if it's interrupted by signals.
+ */
+int mm_take_all_locks(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ int ret = -EINTR;
+
+ BUG_ON(down_read_trylock(&mm->mmap_sem));
+
+ mutex_lock(&mm_all_locks_mutex);
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (signal_pending(current))
+ goto out_unlock;
+ if (vma->vm_file && vma->vm_file->f_mapping)
+ vm_lock_mapping(mm, vma->vm_file->f_mapping);
+ }
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (signal_pending(current))
+ goto out_unlock;
+ if (vma->anon_vma)
+ vm_lock_anon_vma(mm, vma->anon_vma);
+ }
+
+ ret = 0;
+
+out_unlock:
+ if (ret)
+ mm_drop_all_locks(mm);
+
+ return ret;
+}
+
+static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
+{
+ if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+ /*
+ * The LSB of head.next can't change to 0 from under
+ * us because we hold the mm_all_locks_mutex.
+ *
+ * We must however clear the bitflag before unlocking
+ * the vma so the users using the anon_vma->head will
+ * never see our bitflag.
+ *
+ * No need of atomic instructions here, head.next
+ * can't change from under us until we release the
+ * anon_vma->lock.
+ */
+ if (!__test_and_clear_bit(0, (unsigned long *)
+ &anon_vma->head.next))
+ BUG();
+ spin_unlock(&anon_vma->lock);
+ }
+}
+
+static void vm_unlock_mapping(struct address_space *mapping)
+{
+ if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+ /*
+ * AS_MM_ALL_LOCKS can't change to 0 from under us
+ * because we hold the mm_all_locks_mutex.
+ */
+ spin_unlock(&mapping->i_mmap_lock);
+ if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
+ &mapping->flags))
+ BUG();
+ }
+}
+
+/*
+ * The mmap_sem cannot be released by the caller until
+ * mm_drop_all_locks() returns.
+ */
+void mm_drop_all_locks(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+
+ BUG_ON(down_read_trylock(&mm->mmap_sem));
+ BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (vma->anon_vma)
+ vm_unlock_anon_vma(vma->anon_vma);
+ if (vma->vm_file && vma->vm_file->f_mapping)
+ vm_unlock_mapping(vma->vm_file->f_mapping);
+ }
+
+ mutex_unlock(&mm_all_locks_mutex);
+}