*
* (C) Copyright 1996 Linus Torvalds
*
- * Address space accounting code <alan@redhat.com>
+ * Address space accounting code <alan@lxorguk.ukuu.org.uk>
* (C) Copyright 2002 Red Hat Inc, All Rights Reserved
*/
#include <linux/hugetlb.h>
#include <linux/slab.h>
#include <linux/shm.h>
+#include <linux/ksm.h>
#include <linux/mman.h>
#include <linux/swap.h>
+#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/mmu_notifier.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include "internal.h"
+
static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
- /*
- * We don't need page_table_lock: we have mmap_sem exclusively.
- */
pgd = pgd_offset(mm, addr);
if (pgd_none_or_clear_bad(pgd))
return NULL;
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd = NULL;
+ pmd_t *pmd;
- /*
- * We do need page_table_lock: because allocators expect that.
- */
- spin_lock(&mm->page_table_lock);
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
- goto out;
+ return NULL;
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
- goto out;
+ return NULL;
if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
- pmd = NULL;
-out:
- spin_unlock(&mm->page_table_lock);
+ return NULL;
+
return pmd;
}
struct address_space *mapping = NULL;
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
+ spinlock_t *old_ptl, *new_ptl;
+ unsigned long old_start;
+ old_start = old_addr;
+ mmu_notifier_invalidate_range_start(vma->vm_mm,
+ old_start, old_end);
if (vma->vm_file) {
/*
* Subtle point from Rajesh Venkatasubramanian: before
- * moving file-based ptes, we must lock vmtruncate out,
- * since it might clean the dst vma before the src vma,
+ * moving file-based ptes, we must lock truncate_pagecache
+ * out, since it might clean the dst vma before the src vma,
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
new_vma->vm_truncate_count = 0;
}
- spin_lock(&mm->page_table_lock);
- old_pte = pte_offset_map(old_pmd, old_addr);
- new_pte = pte_offset_map_nested(new_pmd, new_addr);
+ /*
+ * We don't have to worry about the ordering of src and dst
+ * pte locks because exclusive mmap_sem prevents deadlock.
+ */
+ old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
+ new_pte = pte_offset_map_nested(new_pmd, new_addr);
+ new_ptl = pte_lockptr(mm, new_pmd);
+ if (new_ptl != old_ptl)
+ spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+ arch_enter_lazy_mmu_mode();
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
new_pte++, new_addr += PAGE_SIZE) {
if (pte_none(*old_pte))
continue;
pte = ptep_clear_flush(vma, old_addr, old_pte);
- /* ZERO_PAGE can be dependant on virtual addr */
pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
set_pte_at(mm, new_addr, new_pte, pte);
}
+ arch_leave_lazy_mmu_mode();
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
pte_unmap_nested(new_pte - 1);
- pte_unmap(old_pte - 1);
- spin_unlock(&mm->page_table_lock);
+ pte_unmap_unlock(old_pte - 1, old_ptl);
if (mapping)
spin_unlock(&mapping->i_mmap_lock);
+ mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
}
#define LATENCY_LIMIT (64 * PAGE_SIZE)
-static unsigned long move_page_tables(struct vm_area_struct *vma,
+unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len)
{
unsigned long excess = 0;
unsigned long hiwater_vm;
int split = 0;
+ int err;
/*
* We'd prefer to avoid failure later on in do_munmap:
if (mm->map_count >= sysctl_max_map_count - 3)
return -ENOMEM;
+ /*
+ * Advise KSM to break any KSM pages in the area to be moved:
+ * it would be confusing if they were to turn up at the new
+ * location, where they happen to coincide with different KSM
+ * pages recently unmapped. But leave vma->vm_flags as it was,
+ * so KSM can come around to merge on vma and new_vma afterwards.
+ */
+ err = ksm_madvise(vma, old_addr, old_addr + old_len,
+ MADV_UNMERGEABLE, &vm_flags);
+ if (err)
+ return err;
+
new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
if (!new_vma)
if (vm_flags & VM_LOCKED) {
mm->locked_vm += new_len >> PAGE_SHIFT;
if (new_len > old_len)
- make_pages_present(new_addr + old_len,
- new_addr + new_len);
+ mlock_vma_pages_range(new_vma, new_addr + old_len,
+ new_addr + new_len);
}
return new_addr;
if ((addr <= new_addr) && (addr+old_len) > new_addr)
goto out;
+ ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
+ if (ret)
+ goto out;
+
ret = do_munmap(mm, new_addr, new_len);
if (ret)
goto out;
/* We can't remap across vm area boundaries */
if (old_len > vma->vm_end - addr)
goto out;
- if (vma->vm_flags & VM_DONTEXPAND) {
+ if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) {
if (new_len > old_len)
goto out;
}
vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
if (vma->vm_flags & VM_LOCKED) {
mm->locked_vm += pages;
- make_pages_present(addr + old_len,
+ mlock_vma_pages_range(vma, addr + old_len,
addr + new_len);
}
ret = addr;
new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
vma->vm_pgoff, map_flags);
- ret = new_addr;
- if (new_addr & ~PAGE_MASK)
+ if (new_addr & ~PAGE_MASK) {
+ ret = new_addr;
+ goto out;
+ }
+
+ ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
+ if (ret)
goto out;
}
ret = move_vma(vma, addr, old_len, new_len, new_addr);
return ret;
}
-asmlinkage unsigned long sys_mremap(unsigned long addr,
- unsigned long old_len, unsigned long new_len,
- unsigned long flags, unsigned long new_addr)
+SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
+ unsigned long, new_len, unsigned long, flags,
+ unsigned long, new_addr)
{
unsigned long ret;