X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fmlock.c;h=2b8335a8940052874a18a9b3657c830c61e13284;hb=de3fab39348dff18c69a0cd04efee9c276a02f51;hp=008ea70b7afa9a2baed56cdc9b6b43ef0b05a6ff;hpb=9978ad583e100945b74e4f33e73317983ea32df9;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/mlock.c b/mm/mlock.c index 008ea70..2b8335a 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -31,7 +31,6 @@ int can_do_mlock(void) } EXPORT_SYMBOL(can_do_mlock); -#ifdef CONFIG_UNEVICTABLE_LRU /* * Mlocked pages are marked with PageMlocked() flag for efficient testing * in vmscan and, possibly, the fault path; and to support semi-accurate @@ -66,14 +65,10 @@ void __clear_page_mlock(struct page *page) putback_lru_page(page); } else { /* - * Page not on the LRU yet. Flush all pagevecs and retry. + * We lost the race. the page already moved to evictable list. */ - lru_add_drain_all(); - if (!isolate_lru_page(page)) - putback_lru_page(page); - else if (PageUnevictable(page)) + if (PageUnevictable(page)) count_vm_event(UNEVICTABLE_PGSTRANDED); - } } @@ -93,25 +88,22 @@ void mlock_vma_page(struct page *page) } } -/* - * called from munlock()/munmap() path with page supposedly on the LRU. +/** + * munlock_vma_page - munlock a vma page + * @page - page to be unlocked * - * Note: unlike mlock_vma_page(), we can't just clear the PageMlocked - * [in try_to_munlock()] and then attempt to isolate the page. We must - * isolate the page to keep others from messing with its unevictable - * and mlocked state while trying to munlock. However, we pre-clear the - * mlocked state anyway as we might lose the isolation race and we might - * not get another chance to clear PageMlocked. If we successfully - * isolate the page and try_to_munlock() detects other VM_LOCKED vmas - * mapping the page, it will restore the PageMlocked state, unless the page - * is mapped in a non-linear vma. So, we go ahead and SetPageMlocked(), - * perhaps redundantly. - * If we lose the isolation race, and the page is mapped by other VM_LOCKED - * vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap() - * either of which will restore the PageMlocked state by calling - * mlock_vma_page() above, if it can grab the vma's mmap sem. + * called from munlock()/munmap() path with page supposedly on the LRU. + * When we munlock a page, because the vma where we found the page is being + * munlock()ed or munmap()ed, we want to check whether other vmas hold the + * page locked so that we can leave it on the unevictable lru list and not + * bother vmscan with it. However, to walk the page's rmap list in + * try_to_munlock() we must isolate the page from the LRU. If some other + * task has removed the page from the LRU, we won't be able to do that. + * So we clear the PageMlocked as we might not get another chance. If we + * can't isolate the page, we leave it for putback_lru_page() and vmscan + * [page_referenced()/try_to_unmap()] to deal with. */ -static void munlock_vma_page(struct page *page) +void munlock_vma_page(struct page *page) { BUG_ON(!PageLocked(page)); @@ -122,18 +114,18 @@ static void munlock_vma_page(struct page *page) /* * did try_to_unlock() succeed or punt? */ - if (ret == SWAP_SUCCESS || ret == SWAP_AGAIN) + if (ret != SWAP_MLOCK) count_vm_event(UNEVICTABLE_PGMUNLOCKED); putback_lru_page(page); } else { /* - * We lost the race. let try_to_unmap() deal - * with it. At least we get the page state and - * mlock stats right. However, page is still on - * the noreclaim list. We'll fix that up when - * the page is eventually freed or we scan the - * noreclaim list. + * Some other task has removed the page from the LRU. + * putback_lru_page() will take care of removing the + * page from the unevictable list, if necessary. + * vmscan [page_referenced()] will move the page back + * to the unevictable list if some other vma has it + * mlocked. */ if (PageUnevictable(page)) count_vm_event(UNEVICTABLE_PGSTRANDED); @@ -144,50 +136,36 @@ static void munlock_vma_page(struct page *page) } /** - * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma. + * __mlock_vma_pages_range() - mlock a range of pages in the vma. * @vma: target vma * @start: start address * @end: end address - * @mlock: 0 indicate munlock, otherwise mlock. * - * If @mlock == 0, unlock an mlocked range; - * else mlock the range of pages. This takes care of making the pages present , - * too. + * This takes care of making the pages present too. * * return 0 on success, negative error code on error. * * vma->vm_mm->mmap_sem must be held for at least read. */ static long __mlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - int mlock) + unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; unsigned long addr = start; struct page *pages[16]; /* 16 gives a reasonable batch */ int nr_pages = (end - start) / PAGE_SIZE; - int ret; - int gup_flags = 0; + int ret = 0; + int gup_flags; VM_BUG_ON(start & ~PAGE_MASK); VM_BUG_ON(end & ~PAGE_MASK); VM_BUG_ON(start < vma->vm_start); VM_BUG_ON(end > vma->vm_end); - VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) && - (atomic_read(&mm->mm_users) != 0)); - - /* - * mlock: don't page populate if page has PROT_NONE permission. - * munlock: the pages always do munlock althrough - * its has PROT_NONE permission. - */ - if (!mlock) - gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS; + VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); + gup_flags = FOLL_TOUCH | FOLL_GET; if (vma->vm_flags & VM_WRITE) - gup_flags |= GUP_FLAGS_WRITE; - - lru_add_drain_all(); /* push cached pages to LRU */ + gup_flags |= FOLL_WRITE; while (nr_pages > 0) { int i; @@ -207,53 +185,45 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, * This can happen for, e.g., VM_NONLINEAR regions before * a page has been allocated and mapped at a given offset, * or for addresses that map beyond end of a file. - * We'll mlock the the pages if/when they get faulted in. + * We'll mlock the pages if/when they get faulted in. */ if (ret < 0) break; - if (ret == 0) { - /* - * We know the vma is there, so the only time - * we cannot get a single page should be an - * error (ret < 0) case. - */ - WARN_ON(1); - break; - } lru_add_drain(); /* push cached pages to LRU */ for (i = 0; i < ret; i++) { struct page *page = pages[i]; - lock_page(page); - /* - * Because we lock page here and migration is blocked - * by the elevated reference, we need only check for - * page truncation (file-cache only). - */ if (page->mapping) { - if (mlock) + /* + * That preliminary check is mainly to avoid + * the pointless overhead of lock_page on the + * ZERO_PAGE: which might bounce very badly if + * there is contention. However, we're still + * dirtying its cacheline with get/put_page: + * we'll add another __get_user_pages flag to + * avoid it if that case turns out to matter. + */ + lock_page(page); + /* + * Because we lock page here and migration is + * blocked by the elevated reference, we need + * only check for file-cache page truncation. + */ + if (page->mapping) mlock_vma_page(page); - else - munlock_vma_page(page); + unlock_page(page); } - unlock_page(page); - put_page(page); /* ref from get_user_pages() */ - - /* - * here we assume that get_user_pages() has given us - * a list of virtually contiguous pages. - */ - addr += PAGE_SIZE; /* for next get_user_pages() */ - nr_pages--; + put_page(page); /* ref from get_user_pages() */ } + + addr += ret * PAGE_SIZE; + nr_pages -= ret; ret = 0; } - lru_add_drain_all(); /* to update stats */ - - return ret; /* count entire vma as locked_vm */ + return ret; /* 0 or negative error code */ } /* @@ -268,27 +238,6 @@ static int __mlock_posix_error_return(long retval) return retval; } -#else /* CONFIG_UNEVICTABLE_LRU */ - -/* - * Just make pages present if VM_LOCKED. No-op if unlocking. - */ -static long __mlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - int mlock) -{ - if (mlock && (vma->vm_flags & VM_LOCKED)) - return make_pages_present(start, end); - return 0; -} - -static inline int __mlock_posix_error_return(long retval) -{ - return 0; -} - -#endif /* CONFIG_UNEVICTABLE_LRU */ - /** * mlock_vma_pages_range() - mlock pages in specified vma range. * @vma - the vma containing the specfied address range @@ -301,14 +250,10 @@ static inline int __mlock_posix_error_return(long retval) * * return number of pages [> 0] to be removed from locked_vm on success * of "special" vmas. - * - * return negative error if vma spanning @start-@range disappears while - * mmap semaphore is dropped. Unlikely? */ long mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - struct mm_struct *mm = vma->vm_mm; int nr_pages = (end - start) / PAGE_SIZE; BUG_ON(!(vma->vm_flags & VM_LOCKED)); @@ -321,20 +266,11 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current))) { - long error; - downgrade_write(&mm->mmap_sem); - error = __mlock_vma_pages_range(vma, start, end, 1); + __mlock_vma_pages_range(vma, start, end); - up_read(&mm->mmap_sem); - /* vma can change or disappear */ - down_write(&mm->mmap_sem); - vma = find_vma(mm, start); - /* non-NULL vma must contain @start, but need to check @end */ - if (!vma || end > vma->vm_end) - return -ENOMEM; - - return 0; /* hide other errors from mmap(), et al */ + /* Hide errors from mmap() and other callers */ + return 0; } /* @@ -352,7 +288,6 @@ no_mlock: return nr_pages; /* error or pages NOT mlocked */ } - /* * munlock_vma_pages_range() - munlock all pages in the vma range.' * @vma - vma containing range to be munlock()ed. @@ -372,10 +307,38 @@ no_mlock: * free them. This will result in freeing mlocked pages. */ void munlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end) { + unsigned long addr; + + lru_add_drain(); vma->vm_flags &= ~VM_LOCKED; - __mlock_vma_pages_range(vma, start, end, 0); + + for (addr = start; addr < end; addr += PAGE_SIZE) { + struct page *page; + /* + * Although FOLL_DUMP is intended for get_dump_page(), + * it just so happens that its special treatment of the + * ZERO_PAGE (returning an error instead of doing get_page) + * suits munlock very well (and if somehow an abnormal page + * has sneaked into the range, we won't oops here: great). + */ + page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); + if (page && !IS_ERR(page)) { + lock_page(page); + /* + * Like in __mlock_vma_pages_range(), + * because we lock page here and migration is + * blocked by the elevated reference, we need + * only check for file-cache page truncation. + */ + if (page->mapping) + munlock_vma_page(page); + unlock_page(page); + put_page(page); + } + cond_resched(); + } } /* @@ -442,45 +405,14 @@ success: * It's okay if try_to_unmap_one unmaps a page just after we * set VM_LOCKED, __mlock_vma_pages_range will bring it back. */ - vma->vm_flags = newflags; if (lock) { - /* - * mmap_sem is currently held for write. Downgrade the write - * lock to a read lock so that other faults, mmap scans, ... - * while we fault in all pages. - */ - downgrade_write(&mm->mmap_sem); - - ret = __mlock_vma_pages_range(vma, start, end, 1); - - /* - * Need to reacquire mmap sem in write mode, as our callers - * expect this. We have no support for atomically upgrading - * a sem to write, so we need to check for ranges while sem - * is unlocked. - */ - up_read(&mm->mmap_sem); - /* vma can change or disappear */ - down_write(&mm->mmap_sem); - *prev = find_vma(mm, start); - /* non-NULL *prev must contain @start, but need to check @end */ - if (!(*prev) || end > (*prev)->vm_end) - ret = -ENOMEM; - else if (ret > 0) { - mm->locked_vm -= ret; - ret = 0; - } else - ret = __mlock_posix_error_return(ret); /* translate if needed */ + vma->vm_flags = newflags; + ret = __mlock_vma_pages_range(vma, start, end); + if (ret < 0) + ret = __mlock_posix_error_return(ret); } else { - /* - * TODO: for unlocking, pages will already be resident, so - * we don't need to wait for allocations/reclaim/pagein, ... - * However, unlocking a very large region can still take a - * while. Should we downgrade the semaphore for both lock - * AND unlock ? - */ - __mlock_vma_pages_range(vma, start, end, 0); + munlock_vma_pages_range(vma, start, end); } out: @@ -537,7 +469,7 @@ static int do_mlock(unsigned long start, size_t len, int on) return error; } -asmlinkage long sys_mlock(unsigned long start, size_t len) +SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) { unsigned long locked; unsigned long lock_limit; @@ -546,6 +478,8 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) if (!can_do_mlock()) return -EPERM; + lru_add_drain_all(); /* flush pagevec */ + down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; @@ -563,7 +497,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) return error; } -asmlinkage long sys_munlock(unsigned long start, size_t len) +SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) { int ret; @@ -600,7 +534,7 @@ out: return 0; } -asmlinkage long sys_mlockall(int flags) +SYSCALL_DEFINE1(mlockall, int, flags) { unsigned long lock_limit; int ret = -EINVAL; @@ -612,6 +546,8 @@ asmlinkage long sys_mlockall(int flags) if (!can_do_mlock()) goto out; + lru_add_drain_all(); /* flush pagevec */ + down_write(¤t->mm->mmap_sem); lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; @@ -626,7 +562,7 @@ out: return ret; } -asmlinkage long sys_munlockall(void) +SYSCALL_DEFINE0(munlockall) { int ret; @@ -671,3 +607,44 @@ void user_shm_unlock(size_t size, struct user_struct *user) spin_unlock(&shmlock_user_lock); free_uid(user); } + +int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, + size_t size) +{ + unsigned long lim, vm, pgsz; + int error = -ENOMEM; + + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + down_write(&mm->mmap_sem); + + lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = mm->total_vm + pgsz; + if (lim < vm) + goto out; + + lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = mm->locked_vm + pgsz; + if (lim < vm) + goto out; + + mm->total_vm += pgsz; + mm->locked_vm += pgsz; + + error = 0; + out: + up_write(&mm->mmap_sem); + return error; +} + +void refund_locked_memory(struct mm_struct *mm, size_t size) +{ + unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + down_write(&mm->mmap_sem); + + mm->total_vm -= pgsz; + mm->locked_vm -= pgsz; + + up_write(&mm->mmap_sem); +}