X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fmlock.c;h=cbe9e0581b75dcaf06335ccc017a68789d6247d2;hb=0b7f569e45bb6be142d87017030669a6a7d327a1;hp=8746fe3f973040e4447f880d853d3b8cb49419ec;hpb=b291f000393f5a0b679012b39d79fbc85c018233;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/mlock.c b/mm/mlock.c index 8746fe3..cbe9e05 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -60,15 +60,16 @@ void __clear_page_mlock(struct page *page) return; } + dec_zone_page_state(page, NR_MLOCK); + count_vm_event(UNEVICTABLE_PGCLEARED); if (!isolate_lru_page(page)) { putback_lru_page(page); } else { /* - * Page not on the LRU yet. Flush all pagevecs and retry. + * We lost the race. the page already moved to evictable list. */ - lru_add_drain_all(); - if (!isolate_lru_page(page)) - putback_lru_page(page); + if (PageUnevictable(page)) + count_vm_event(UNEVICTABLE_PGSTRANDED); } } @@ -80,8 +81,12 @@ void mlock_vma_page(struct page *page) { BUG_ON(!PageLocked(page)); - if (!TestSetPageMlocked(page) && !isolate_lru_page(page)) - putback_lru_page(page); + if (!TestSetPageMlocked(page)) { + inc_zone_page_state(page, NR_MLOCK); + count_vm_event(UNEVICTABLE_PGMLOCKED); + if (!isolate_lru_page(page)) + putback_lru_page(page); + } } /* @@ -106,34 +111,78 @@ static void munlock_vma_page(struct page *page) { BUG_ON(!PageLocked(page)); - if (TestClearPageMlocked(page) && !isolate_lru_page(page)) { - try_to_munlock(page); - putback_lru_page(page); + if (TestClearPageMlocked(page)) { + dec_zone_page_state(page, NR_MLOCK); + if (!isolate_lru_page(page)) { + int ret = try_to_munlock(page); + /* + * did try_to_unlock() succeed or punt? + */ + if (ret == SWAP_SUCCESS || ret == SWAP_AGAIN) + count_vm_event(UNEVICTABLE_PGMUNLOCKED); + + putback_lru_page(page); + } else { + /* + * We lost the race. let try_to_unmap() deal + * with it. At least we get the page state and + * mlock stats right. However, page is still on + * the noreclaim list. We'll fix that up when + * the page is eventually freed or we scan the + * noreclaim list. + */ + if (PageUnevictable(page)) + count_vm_event(UNEVICTABLE_PGSTRANDED); + else + count_vm_event(UNEVICTABLE_PGMUNLOCKED); + } } } -/* - * mlock a range of pages in the vma. +/** + * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma. + * @vma: target vma + * @start: start address + * @end: end address + * @mlock: 0 indicate munlock, otherwise mlock. + * + * If @mlock == 0, unlock an mlocked range; + * else mlock the range of pages. This takes care of making the pages present , + * too. * - * This takes care of making the pages present too. + * return 0 on success, negative error code on error. * - * vma->vm_mm->mmap_sem must be held for write. + * vma->vm_mm->mmap_sem must be held for at least read. */ -static int __mlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static long __mlock_vma_pages_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + int mlock) { struct mm_struct *mm = vma->vm_mm; unsigned long addr = start; struct page *pages[16]; /* 16 gives a reasonable batch */ - int write = !!(vma->vm_flags & VM_WRITE); int nr_pages = (end - start) / PAGE_SIZE; - int ret; + int ret = 0; + int gup_flags = 0; - VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK); - VM_BUG_ON(start < vma->vm_start || end > vma->vm_end); - VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); + VM_BUG_ON(start & ~PAGE_MASK); + VM_BUG_ON(end & ~PAGE_MASK); + VM_BUG_ON(start < vma->vm_start); + VM_BUG_ON(end > vma->vm_end); + VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) && + (atomic_read(&mm->mm_users) != 0)); - lru_add_drain_all(); /* push cached pages to LRU */ + /* + * mlock: don't page populate if vma has PROT_NONE permission. + * munlock: always do munlock although the vma has PROT_NONE + * permission, or SIGKILL is pending. + */ + if (!mlock) + gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS | + GUP_FLAGS_IGNORE_SIGKILL; + + if (vma->vm_flags & VM_WRITE) + gup_flags |= GUP_FLAGS_WRITE; while (nr_pages > 0) { int i; @@ -146,9 +195,9 @@ static int __mlock_vma_pages_range(struct vm_area_struct *vma, * disable migration of this page. However, page may * still be truncated out from under us. */ - ret = get_user_pages(current, mm, addr, + ret = __get_user_pages(current, mm, addr, min_t(int, nr_pages, ARRAY_SIZE(pages)), - write, 0, pages, NULL); + gup_flags, pages, NULL); /* * This can happen for, e.g., VM_NONLINEAR regions before * a page has been allocated and mapped at a given offset, @@ -178,8 +227,12 @@ static int __mlock_vma_pages_range(struct vm_area_struct *vma, * by the elevated reference, we need only check for * page truncation (file-cache only). */ - if (page->mapping) - mlock_vma_page(page); + if (page->mapping) { + if (mlock) + mlock_vma_page(page); + else + munlock_vma_page(page); + } unlock_page(page); put_page(page); /* ref from get_user_pages() */ @@ -190,104 +243,22 @@ static int __mlock_vma_pages_range(struct vm_area_struct *vma, addr += PAGE_SIZE; /* for next get_user_pages() */ nr_pages--; } + ret = 0; } - lru_add_drain_all(); /* to update stats */ - - return 0; /* count entire vma as locked_vm */ -} - -/* - * private structure for munlock page table walk - */ -struct munlock_page_walk { - struct vm_area_struct *vma; - pmd_t *pmd; /* for migration_entry_wait() */ -}; - -/* - * munlock normal pages for present ptes - */ -static int __munlock_pte_handler(pte_t *ptep, unsigned long addr, - unsigned long end, struct mm_walk *walk) -{ - struct munlock_page_walk *mpw = walk->private; - swp_entry_t entry; - struct page *page; - pte_t pte; - -retry: - pte = *ptep; - /* - * If it's a swap pte, we might be racing with page migration. - */ - if (unlikely(!pte_present(pte))) { - if (!is_swap_pte(pte)) - goto out; - entry = pte_to_swp_entry(pte); - if (is_migration_entry(entry)) { - migration_entry_wait(mpw->vma->vm_mm, mpw->pmd, addr); - goto retry; - } - goto out; - } - - page = vm_normal_page(mpw->vma, addr, pte); - if (!page) - goto out; - - lock_page(page); - if (!page->mapping) { - unlock_page(page); - goto retry; - } - munlock_vma_page(page); - unlock_page(page); - -out: - return 0; + return ret; /* count entire vma as locked_vm */ } /* - * Save pmd for pte handler for waiting on migration entries - */ -static int __munlock_pmd_handler(pmd_t *pmd, unsigned long addr, - unsigned long end, struct mm_walk *walk) -{ - struct munlock_page_walk *mpw = walk->private; - - mpw->pmd = pmd; - return 0; -} - - -/* - * munlock a range of pages in the vma using standard page table walk. - * - * vma->vm_mm->mmap_sem must be held for write. + * convert get_user_pages() return value to posix mlock() error */ -static void __munlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static int __mlock_posix_error_return(long retval) { - struct mm_struct *mm = vma->vm_mm; - struct munlock_page_walk mpw = { - .vma = vma, - }; - struct mm_walk munlock_page_walk = { - .pmd_entry = __munlock_pmd_handler, - .pte_entry = __munlock_pte_handler, - .private = &mpw, - .mm = mm, - }; - - VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK); - VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); - VM_BUG_ON(start < vma->vm_start); - VM_BUG_ON(end > vma->vm_end); - - lru_add_drain_all(); /* push cached pages to LRU */ - walk_page_range(start, end, &munlock_page_walk); - lru_add_drain_all(); /* to update stats */ + if (retval == -EFAULT) + retval = -ENOMEM; + else if (retval == -ENOMEM) + retval = -EAGAIN; + return retval; } #else /* CONFIG_UNEVICTABLE_LRU */ @@ -295,27 +266,36 @@ static void __munlock_vma_pages_range(struct vm_area_struct *vma, /* * Just make pages present if VM_LOCKED. No-op if unlocking. */ -static int __mlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static long __mlock_vma_pages_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + int mlock) { - if (vma->vm_flags & VM_LOCKED) - make_pages_present(start, end); + if (mlock && (vma->vm_flags & VM_LOCKED)) + return make_pages_present(start, end); return 0; } -/* - * munlock a range of pages in the vma -- no-op. - */ -static void __munlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline int __mlock_posix_error_return(long retval) { + return 0; } + #endif /* CONFIG_UNEVICTABLE_LRU */ -/* - * mlock all pages in this vma range. For mmap()/mremap()/... +/** + * mlock_vma_pages_range() - mlock pages in specified vma range. + * @vma - the vma containing the specfied address range + * @start - starting address in @vma to mlock + * @end - end address [+1] in @vma to mlock + * + * For mmap()/mremap()/expansion of mlocked vma. + * + * return 0 on success for "normal" vmas. + * + * return number of pages [> 0] to be removed from locked_vm on success + * of "special" vmas. */ -int mlock_vma_pages_range(struct vm_area_struct *vma, +long mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { int nr_pages = (end - start) / PAGE_SIZE; @@ -329,8 +309,13 @@ int mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current))) - return __mlock_vma_pages_range(vma, start, end); + vma == get_gate_vma(current))) { + + __mlock_vma_pages_range(vma, start, end, 1); + + /* Hide errors from mmap() and other callers */ + return 0; + } /* * User mapped kernel pages or huge pages: @@ -344,17 +329,33 @@ int mlock_vma_pages_range(struct vm_area_struct *vma, no_mlock: vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */ - return nr_pages; /* pages NOT mlocked */ + return nr_pages; /* error or pages NOT mlocked */ } /* - * munlock all pages in vma. For munmap() and exit(). + * munlock_vma_pages_range() - munlock all pages in the vma range.' + * @vma - vma containing range to be munlock()ed. + * @start - start address in @vma of the range + * @end - end of range in @vma. + * + * For mremap(), munmap() and exit(). + * + * Called with @vma VM_LOCKED. + * + * Returns with VM_LOCKED cleared. Callers must be prepared to + * deal with this. + * + * We don't save and restore VM_LOCKED here because pages are + * still on lru. In unmap path, pages might be scanned by reclaim + * and re-mlocked by try_to_{munlock|unmap} before we unmap and + * free them. This will result in freeing mlocked pages. */ -void munlock_vma_pages_all(struct vm_area_struct *vma) +void munlock_vma_pages_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { vma->vm_flags &= ~VM_LOCKED; - __munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); + __mlock_vma_pages_range(vma, start, end, 0); } /* @@ -424,13 +425,16 @@ success: vma->vm_flags = newflags; if (lock) { - ret = __mlock_vma_pages_range(vma, start, end); + ret = __mlock_vma_pages_range(vma, start, end, 1); + if (ret > 0) { mm->locked_vm -= ret; ret = 0; - } - } else - __munlock_vma_pages_range(vma, start, end); + } else + ret = __mlock_posix_error_return(ret); /* translate if needed */ + } else { + __mlock_vma_pages_range(vma, start, end, 0); + } out: *prev = vma; @@ -486,7 +490,7 @@ static int do_mlock(unsigned long start, size_t len, int on) return error; } -asmlinkage long sys_mlock(unsigned long start, size_t len) +SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) { unsigned long locked; unsigned long lock_limit; @@ -495,6 +499,8 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) if (!can_do_mlock()) return -EPERM; + lru_add_drain_all(); /* flush pagevec */ + down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; @@ -512,7 +518,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) return error; } -asmlinkage long sys_munlock(unsigned long start, size_t len) +SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) { int ret; @@ -549,7 +555,7 @@ out: return 0; } -asmlinkage long sys_mlockall(int flags) +SYSCALL_DEFINE1(mlockall, int, flags) { unsigned long lock_limit; int ret = -EINVAL; @@ -561,6 +567,8 @@ asmlinkage long sys_mlockall(int flags) if (!can_do_mlock()) goto out; + lru_add_drain_all(); /* flush pagevec */ + down_write(¤t->mm->mmap_sem); lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; @@ -575,7 +583,7 @@ out: return ret; } -asmlinkage long sys_munlockall(void) +SYSCALL_DEFINE0(munlockall) { int ret; @@ -620,3 +628,53 @@ void user_shm_unlock(size_t size, struct user_struct *user) spin_unlock(&shmlock_user_lock); free_uid(user); } + +void *alloc_locked_buffer(size_t size) +{ + unsigned long rlim, vm, pgsz; + void *buffer = NULL; + + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = current->mm->total_vm + pgsz; + if (rlim < vm) + goto out; + + rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = current->mm->locked_vm + pgsz; + if (rlim < vm) + goto out; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + goto out; + + current->mm->total_vm += pgsz; + current->mm->locked_vm += pgsz; + + out: + up_write(¤t->mm->mmap_sem); + return buffer; +} + +void release_locked_buffer(void *buffer, size_t size) +{ + unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + current->mm->total_vm -= pgsz; + current->mm->locked_vm -= pgsz; + + up_write(¤t->mm->mmap_sem); +} + +void free_locked_buffer(void *buffer, size_t size) +{ + release_locked_buffer(buffer, size); + + kfree(buffer); +}