* Note: this doesn't free the actual pages themselves. That
* has been handled earlier when unmapping all the memory regions.
*/
-static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long addr)
{
pgtable_t token = pmd_pgtable(*pmd);
pmd_clear(pmd);
- pte_free_tlb(tlb, token);
+ pte_free_tlb(tlb, token, addr);
tlb->mm->nr_ptes--;
}
next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(pmd))
continue;
- free_pte_range(tlb, pmd);
+ free_pte_range(tlb, pmd, addr);
} while (pmd++, addr = next, addr != end);
start &= PUD_MASK;
pmd = pmd_offset(pud, start);
pud_clear(pud);
- pmd_free_tlb(tlb, pmd);
+ pmd_free_tlb(tlb, pmd, start);
}
static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
pud = pud_offset(pgd, start);
pgd_clear(pgd);
- pud_free_tlb(tlb, pud);
+ pud_free_tlb(tlb, pud, start);
}
/*
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, int len, int flags,
- struct page **pages, struct vm_area_struct **vmas)
+ unsigned long start, int nr_pages, int flags,
+ struct page **pages, struct vm_area_struct **vmas)
{
int i;
unsigned int vm_flags = 0;
int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
- if (len <= 0)
+ if (nr_pages <= 0)
return 0;
/*
* Require read or write permissions.
vmas[i] = gate_vma;
i++;
start += PAGE_SIZE;
- len--;
+ nr_pages--;
continue;
}
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
- &start, &len, i, write);
+ &start, &nr_pages, i, write);
continue;
}
cond_resched();
while (!(page = follow_page(vma, start, foll_flags))) {
int ret;
+
ret = handle_mm_fault(mm, vma, start,
- foll_flags & FOLL_WRITE);
+ (foll_flags & FOLL_WRITE) ?
+ FAULT_FLAG_WRITE : 0);
+
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return i ? i : -ENOMEM;
vmas[i] = vma;
i++;
start += PAGE_SIZE;
- len--;
- } while (len && start < vma->vm_end);
- } while (len);
+ nr_pages--;
+ } while (nr_pages && start < vma->vm_end);
+ } while (nr_pages);
return i;
}
* @tsk: task_struct of target task
* @mm: mm_struct of target mm
* @start: starting user address
- * @len: number of pages from start to pin
+ * @nr_pages: number of pages from start to pin
* @write: whether pages will be written to by the caller
* @force: whether to force write access even if user mapping is
* readonly. This will result in the page being COWed even
* Or NULL if the caller does not require them.
*
* Returns number of pages pinned. This may be fewer than the number
- * requested. If len is 0 or negative, returns 0. If no pages
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
* were pinned, returns -errno. Each page returned must be released
* with a put_page() call when it is finished with. vmas will only
* remain valid while mmap_sem is held.
* See also get_user_pages_fast, for performance critical applications.
*/
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, int len, int write, int force,
+ unsigned long start, int nr_pages, int write, int force,
struct page **pages, struct vm_area_struct **vmas)
{
int flags = 0;
if (force)
flags |= GUP_FLAGS_FORCE;
- return __get_user_pages(tsk, mm,
- start, len, flags,
- pages, vmas);
+ return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
}
EXPORT_SYMBOL(get_user_pages);
*/
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
- int write_access, pte_t orig_pte)
+ unsigned int flags, pte_t orig_pte)
{
spinlock_t *ptl;
struct page *page;
delayacct_set_flag(DELAYACCT_PF_SWAPIN);
page = lookup_swap_cache(entry);
if (!page) {
- grab_swap_token(); /* Contend for token _before_ read-in */
+ grab_swap_token(mm); /* Contend for token _before_ read-in */
page = swapin_readahead(entry,
GFP_HIGHUSER_MOVABLE, vma, address);
if (!page) {
inc_mm_counter(mm, anon_rss);
pte = mk_pte(page, vma->vm_page_prot);
- if (write_access && reuse_swap_page(page)) {
+ if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
- write_access = 0;
+ flags &= ~FAULT_FLAG_WRITE;
}
flush_icache_page(vma, page);
set_pte_at(mm, address, page_table, pte);
try_to_free_swap(page);
unlock_page(page);
- if (write_access) {
+ if (flags & FAULT_FLAG_WRITE) {
ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
if (ret & VM_FAULT_ERROR)
ret &= VM_FAULT_ERROR;
*/
static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
- int write_access)
+ unsigned int flags)
{
struct page *page;
spinlock_t *ptl;
* due to the bad i386 page protection. But it's valid
* for other architectures too.
*
- * Note that if write_access is true, we either now have
+ * Note that if FAULT_FLAG_WRITE is set, we either now have
* an exclusive copy of the page, or this is a shared mapping,
* so we can make it writable and dirty to avoid having to
* handle that later.
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
- int write_access, pte_t orig_pte)
+ unsigned int flags, pte_t orig_pte)
{
pgoff_t pgoff = (((address & PAGE_MASK)
- vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
- unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
pte_unmap(page_table);
return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
*/
static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
- int write_access, pte_t orig_pte)
+ unsigned int flags, pte_t orig_pte)
{
- unsigned int flags = FAULT_FLAG_NONLINEAR |
- (write_access ? FAULT_FLAG_WRITE : 0);
pgoff_t pgoff;
+ flags |= FAULT_FLAG_NONLINEAR;
+
if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
return 0;
*/
static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
- pte_t *pte, pmd_t *pmd, int write_access)
+ pte_t *pte, pmd_t *pmd, unsigned int flags)
{
pte_t entry;
spinlock_t *ptl;
if (vma->vm_ops) {
if (likely(vma->vm_ops->fault))
return do_linear_fault(mm, vma, address,
- pte, pmd, write_access, entry);
+ pte, pmd, flags, entry);
}
return do_anonymous_page(mm, vma, address,
- pte, pmd, write_access);
+ pte, pmd, flags);
}
if (pte_file(entry))
return do_nonlinear_fault(mm, vma, address,
- pte, pmd, write_access, entry);
+ pte, pmd, flags, entry);
return do_swap_page(mm, vma, address,
- pte, pmd, write_access, entry);
+ pte, pmd, flags, entry);
}
ptl = pte_lockptr(mm, pmd);
spin_lock(ptl);
if (unlikely(!pte_same(*pte, entry)))
goto unlock;
- if (write_access) {
+ if (flags & FAULT_FLAG_WRITE) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address,
pte, pmd, ptl, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
- if (ptep_set_access_flags(vma, address, pte, entry, write_access)) {
+ if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) {
update_mmu_cache(vma, address, entry);
} else {
/*
* This still avoids useless tlb flushes for .text page faults
* with threads.
*/
- if (write_access)
+ if (flags & FAULT_FLAG_WRITE)
flush_tlb_page(vma, address);
}
unlock:
* By the time we get here, we already hold the mm semaphore
*/
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, int write_access)
+ unsigned long address, unsigned int flags)
{
pgd_t *pgd;
pud_t *pud;
count_vm_event(PGFAULT);
if (unlikely(is_vm_hugetlb_page(vma)))
- return hugetlb_fault(mm, vma, address, write_access);
+ return hugetlb_fault(mm, vma, address, flags);
pgd = pgd_offset(mm, address);
pud = pud_alloc(mm, pgd, address);
if (!pte)
return VM_FAULT_OOM;
- return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+ return handle_pte_fault(mm, vma, address, pte, pmd, flags);
}
#ifndef __PAGETABLE_PUD_FOLDED
return -EINVAL;
}
+/**
+ * follow_pfn - look up PFN at a user virtual address
+ * @vma: memory mapping
+ * @address: user virtual address
+ * @pfn: location to store found PFN
+ *
+ * Only IO mappings and raw PFN mappings are allowed.
+ *
+ * Returns zero and the pfn at @pfn on success, -ve otherwise.
+ */
+int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+ unsigned long *pfn)
+{
+ int ret = -EINVAL;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ return ret;
+
+ ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
+ if (ret)
+ return ret;
+ *pfn = pte_pfn(*ptep);
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(follow_pfn);
+
#ifdef CONFIG_HAVE_IOREMAP_PROT
int follow_phys(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
unsigned long *prot, resource_size_t *phys)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
+ int ret = -EINVAL;
pte_t *ptep, pte;
spinlock_t *ptl;
- resource_size_t phys_addr = 0;
- struct mm_struct *mm = vma->vm_mm;
- int ret = -EINVAL;
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
goto out;
- pgd = pgd_offset(mm, address);
- if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- goto out;
-
- pud = pud_offset(pgd, address);
- if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+ if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
goto out;
-
- pmd = pmd_offset(pud, address);
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- goto out;
-
- /* We cannot handle huge page PFN maps. Luckily they don't exist. */
- if (pmd_huge(*pmd))
- goto out;
-
- ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
- if (!ptep)
- goto out;
-
pte = *ptep;
- if (!pte_present(pte))
- goto unlock;
+
if ((flags & FOLL_WRITE) && !pte_write(pte))
goto unlock;
- phys_addr = pte_pfn(pte);
- phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
*prot = pgprot_val(pte_pgprot(pte));
- *phys = phys_addr;
- ret = 0;
+ *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
+ ret = 0;
unlock:
pte_unmap_unlock(ptep, ptl);
out: