{
if (capable(CAP_IPC_LOCK))
return 1;
- if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
+ if (rlimit(RLIMIT_MEMLOCK) != 0)
return 1;
return 0;
}
EXPORT_SYMBOL(can_do_mlock);
-#ifdef CONFIG_UNEVICTABLE_LRU
/*
* Mlocked pages are marked with PageMlocked() flag for efficient testing
* in vmscan and, possibly, the fault path; and to support semi-accurate
return;
}
+ dec_zone_page_state(page, NR_MLOCK);
+ count_vm_event(UNEVICTABLE_PGCLEARED);
if (!isolate_lru_page(page)) {
putback_lru_page(page);
} else {
/*
- * Page not on the LRU yet. Flush all pagevecs and retry.
+ * We lost the race. the page already moved to evictable list.
*/
- lru_add_drain_all();
- if (!isolate_lru_page(page))
- putback_lru_page(page);
+ if (PageUnevictable(page))
+ count_vm_event(UNEVICTABLE_PGSTRANDED);
}
}
{
BUG_ON(!PageLocked(page));
- if (!TestSetPageMlocked(page) && !isolate_lru_page(page))
- putback_lru_page(page);
+ if (!TestSetPageMlocked(page)) {
+ inc_zone_page_state(page, NR_MLOCK);
+ count_vm_event(UNEVICTABLE_PGMLOCKED);
+ if (!isolate_lru_page(page))
+ putback_lru_page(page);
+ }
}
-/*
- * called from munlock()/munmap() path with page supposedly on the LRU.
+/**
+ * munlock_vma_page - munlock a vma page
+ * @page - page to be unlocked
*
- * Note: unlike mlock_vma_page(), we can't just clear the PageMlocked
- * [in try_to_munlock()] and then attempt to isolate the page. We must
- * isolate the page to keep others from messing with its unevictable
- * and mlocked state while trying to munlock. However, we pre-clear the
- * mlocked state anyway as we might lose the isolation race and we might
- * not get another chance to clear PageMlocked. If we successfully
- * isolate the page and try_to_munlock() detects other VM_LOCKED vmas
- * mapping the page, it will restore the PageMlocked state, unless the page
- * is mapped in a non-linear vma. So, we go ahead and SetPageMlocked(),
- * perhaps redundantly.
- * If we lose the isolation race, and the page is mapped by other VM_LOCKED
- * vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap()
- * either of which will restore the PageMlocked state by calling
- * mlock_vma_page() above, if it can grab the vma's mmap sem.
+ * called from munlock()/munmap() path with page supposedly on the LRU.
+ * When we munlock a page, because the vma where we found the page is being
+ * munlock()ed or munmap()ed, we want to check whether other vmas hold the
+ * page locked so that we can leave it on the unevictable lru list and not
+ * bother vmscan with it. However, to walk the page's rmap list in
+ * try_to_munlock() we must isolate the page from the LRU. If some other
+ * task has removed the page from the LRU, we won't be able to do that.
+ * So we clear the PageMlocked as we might not get another chance. If we
+ * can't isolate the page, we leave it for putback_lru_page() and vmscan
+ * [page_referenced()/try_to_unmap()] to deal with.
*/
-static void munlock_vma_page(struct page *page)
+void munlock_vma_page(struct page *page)
{
BUG_ON(!PageLocked(page));
- if (TestClearPageMlocked(page) && !isolate_lru_page(page)) {
- try_to_munlock(page);
- putback_lru_page(page);
+ if (TestClearPageMlocked(page)) {
+ dec_zone_page_state(page, NR_MLOCK);
+ if (!isolate_lru_page(page)) {
+ int ret = try_to_munlock(page);
+ /*
+ * did try_to_unlock() succeed or punt?
+ */
+ if (ret != SWAP_MLOCK)
+ count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+
+ putback_lru_page(page);
+ } else {
+ /*
+ * Some other task has removed the page from the LRU.
+ * putback_lru_page() will take care of removing the
+ * page from the unevictable list, if necessary.
+ * vmscan [page_referenced()] will move the page back
+ * to the unevictable list if some other vma has it
+ * mlocked.
+ */
+ if (PageUnevictable(page))
+ count_vm_event(UNEVICTABLE_PGSTRANDED);
+ else
+ count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+ }
}
}
/**
- * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma.
+ * __mlock_vma_pages_range() - mlock a range of pages in the vma.
* @vma: target vma
* @start: start address
* @end: end address
- * @mlock: 0 indicate munlock, otherwise mlock.
*
- * If @mlock == 0, unlock an mlocked range;
- * else mlock the range of pages. This takes care of making the pages present ,
- * too.
+ * This takes care of making the pages present too.
*
* return 0 on success, negative error code on error.
*
* vma->vm_mm->mmap_sem must be held for at least read.
*/
static long __mlock_vma_pages_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- int mlock)
+ unsigned long start, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long addr = start;
struct page *pages[16]; /* 16 gives a reasonable batch */
int nr_pages = (end - start) / PAGE_SIZE;
- int ret;
- int gup_flags = 0;
+ int ret = 0;
+ int gup_flags;
VM_BUG_ON(start & ~PAGE_MASK);
VM_BUG_ON(end & ~PAGE_MASK);
VM_BUG_ON(start < vma->vm_start);
VM_BUG_ON(end > vma->vm_end);
- VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&
- (atomic_read(&mm->mm_users) != 0));
-
- /*
- * mlock: don't page populate if page has PROT_NONE permission.
- * munlock: the pages always do munlock althrough
- * its has PROT_NONE permission.
- */
- if (!mlock)
- gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS;
+ VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
+ gup_flags = FOLL_TOUCH | FOLL_GET;
if (vma->vm_flags & VM_WRITE)
- gup_flags |= GUP_FLAGS_WRITE;
-
- lru_add_drain_all(); /* push cached pages to LRU */
+ gup_flags |= FOLL_WRITE;
while (nr_pages > 0) {
int i;
* This can happen for, e.g., VM_NONLINEAR regions before
* a page has been allocated and mapped at a given offset,
* or for addresses that map beyond end of a file.
- * We'll mlock the the pages if/when they get faulted in.
+ * We'll mlock the pages if/when they get faulted in.
*/
if (ret < 0)
break;
- if (ret == 0) {
- /*
- * We know the vma is there, so the only time
- * we cannot get a single page should be an
- * error (ret < 0) case.
- */
- WARN_ON(1);
- break;
- }
lru_add_drain(); /* push cached pages to LRU */
for (i = 0; i < ret; i++) {
struct page *page = pages[i];
- lock_page(page);
- /*
- * Because we lock page here and migration is blocked
- * by the elevated reference, we need only check for
- * page truncation (file-cache only).
- */
if (page->mapping) {
- if (mlock)
+ /*
+ * That preliminary check is mainly to avoid
+ * the pointless overhead of lock_page on the
+ * ZERO_PAGE: which might bounce very badly if
+ * there is contention. However, we're still
+ * dirtying its cacheline with get/put_page:
+ * we'll add another __get_user_pages flag to
+ * avoid it if that case turns out to matter.
+ */
+ lock_page(page);
+ /*
+ * Because we lock page here and migration is
+ * blocked by the elevated reference, we need
+ * only check for file-cache page truncation.
+ */
+ if (page->mapping)
mlock_vma_page(page);
- else
- munlock_vma_page(page);
+ unlock_page(page);
}
- unlock_page(page);
- put_page(page); /* ref from get_user_pages() */
-
- /*
- * here we assume that get_user_pages() has given us
- * a list of virtually contiguous pages.
- */
- addr += PAGE_SIZE; /* for next get_user_pages() */
- nr_pages--;
+ put_page(page); /* ref from get_user_pages() */
}
- }
- lru_add_drain_all(); /* to update stats */
+ addr += ret * PAGE_SIZE;
+ nr_pages -= ret;
+ ret = 0;
+ }
- return 0; /* count entire vma as locked_vm */
+ return ret; /* 0 or negative error code */
}
-#else /* CONFIG_UNEVICTABLE_LRU */
-
/*
- * Just make pages present if VM_LOCKED. No-op if unlocking.
+ * convert get_user_pages() return value to posix mlock() error
*/
-static long __mlock_vma_pages_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- int mlock)
+static int __mlock_posix_error_return(long retval)
{
- if (mlock && (vma->vm_flags & VM_LOCKED))
- make_pages_present(start, end);
- return 0;
+ if (retval == -EFAULT)
+ retval = -ENOMEM;
+ else if (retval == -ENOMEM)
+ retval = -EAGAIN;
+ return retval;
}
-#endif /* CONFIG_UNEVICTABLE_LRU */
/**
* mlock_vma_pages_range() - mlock pages in specified vma range.
*
* return number of pages [> 0] to be removed from locked_vm on success
* of "special" vmas.
- *
- * return negative error if vma spanning @start-@range disappears while
- * mmap semaphore is dropped. Unlikely?
*/
long mlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- struct mm_struct *mm = vma->vm_mm;
int nr_pages = (end - start) / PAGE_SIZE;
BUG_ON(!(vma->vm_flags & VM_LOCKED));
if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(current))) {
- long error;
- downgrade_write(&mm->mmap_sem);
-
- error = __mlock_vma_pages_range(vma, start, end, 1);
- up_read(&mm->mmap_sem);
- /* vma can change or disappear */
- down_write(&mm->mmap_sem);
- vma = find_vma(mm, start);
- /* non-NULL vma must contain @start, but need to check @end */
- if (!vma || end > vma->vm_end)
- return -ENOMEM;
+ __mlock_vma_pages_range(vma, start, end);
- return 0; /* hide other errors from mmap(), et al */
+ /* Hide errors from mmap() and other callers */
+ return 0;
}
/*
return nr_pages; /* error or pages NOT mlocked */
}
-
/*
* munlock_vma_pages_range() - munlock all pages in the vma range.'
* @vma - vma containing range to be munlock()ed.
* free them. This will result in freeing mlocked pages.
*/
void munlock_vma_pages_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end)
{
+ unsigned long addr;
+
+ lru_add_drain();
vma->vm_flags &= ~VM_LOCKED;
- __mlock_vma_pages_range(vma, start, end, 0);
+
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ struct page *page;
+ /*
+ * Although FOLL_DUMP is intended for get_dump_page(),
+ * it just so happens that its special treatment of the
+ * ZERO_PAGE (returning an error instead of doing get_page)
+ * suits munlock very well (and if somehow an abnormal page
+ * has sneaked into the range, we won't oops here: great).
+ */
+ page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
+ if (page && !IS_ERR(page)) {
+ lock_page(page);
+ /*
+ * Like in __mlock_vma_pages_range(),
+ * because we lock page here and migration is
+ * blocked by the elevated reference, we need
+ * only check for file-cache page truncation.
+ */
+ if (page->mapping)
+ munlock_vma_page(page);
+ unlock_page(page);
+ put_page(page);
+ }
+ cond_resched();
+ }
}
/*
* It's okay if try_to_unmap_one unmaps a page just after we
* set VM_LOCKED, __mlock_vma_pages_range will bring it back.
*/
- vma->vm_flags = newflags;
if (lock) {
- /*
- * mmap_sem is currently held for write. Downgrade the write
- * lock to a read lock so that other faults, mmap scans, ...
- * while we fault in all pages.
- */
- downgrade_write(&mm->mmap_sem);
-
- ret = __mlock_vma_pages_range(vma, start, end, 1);
- if (ret > 0) {
- mm->locked_vm -= ret;
- ret = 0;
- }
- /*
- * Need to reacquire mmap sem in write mode, as our callers
- * expect this. We have no support for atomically upgrading
- * a sem to write, so we need to check for ranges while sem
- * is unlocked.
- */
- up_read(&mm->mmap_sem);
- /* vma can change or disappear */
- down_write(&mm->mmap_sem);
- *prev = find_vma(mm, start);
- /* non-NULL *prev must contain @start, but need to check @end */
- if (!(*prev) || end > (*prev)->vm_end)
- ret = -ENOMEM;
+ vma->vm_flags = newflags;
+ ret = __mlock_vma_pages_range(vma, start, end);
+ if (ret < 0)
+ ret = __mlock_posix_error_return(ret);
} else {
- /*
- * TODO: for unlocking, pages will already be resident, so
- * we don't need to wait for allocations/reclaim/pagein, ...
- * However, unlocking a very large region can still take a
- * while. Should we downgrade the semaphore for both lock
- * AND unlock ?
- */
- __mlock_vma_pages_range(vma, start, end, 0);
+ munlock_vma_pages_range(vma, start, end);
}
out:
return error;
}
-asmlinkage long sys_mlock(unsigned long start, size_t len)
+SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
{
unsigned long locked;
unsigned long lock_limit;
if (!can_do_mlock())
return -EPERM;
+ lru_add_drain_all(); /* flush pagevec */
+
down_write(¤t->mm->mmap_sem);
len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
start &= PAGE_MASK;
locked = len >> PAGE_SHIFT;
locked += current->mm->locked_vm;
- lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+ lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
/* check against resource limits */
return error;
}
-asmlinkage long sys_munlock(unsigned long start, size_t len)
+SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
{
int ret;
return 0;
}
-asmlinkage long sys_mlockall(int flags)
+SYSCALL_DEFINE1(mlockall, int, flags)
{
unsigned long lock_limit;
int ret = -EINVAL;
if (!can_do_mlock())
goto out;
+ lru_add_drain_all(); /* flush pagevec */
+
down_write(¤t->mm->mmap_sem);
- lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+ lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
ret = -ENOMEM;
return ret;
}
-asmlinkage long sys_munlockall(void)
+SYSCALL_DEFINE0(munlockall)
{
int ret;
int allowed = 0;
locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+ lock_limit = rlimit(RLIMIT_MEMLOCK);
if (lock_limit == RLIM_INFINITY)
allowed = 1;
lock_limit >>= PAGE_SHIFT;