unsigned long pfn)
{
int ret;
+ pgprot_t pgprot = vma->vm_page_prot;
/*
* Technically, architectures with pte_special can avoid all these
* restrictions (same for remap_pfn_range). However we would like
if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT;
- if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE))
+ if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE))
return -EINVAL;
- ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+ ret = insert_pfn(vma, addr, pfn, pgprot);
if (ret)
untrack_pfn_vma(vma, pfn, PAGE_SIZE);
* behaviour that some programs depend on. We mark the "original"
* un-COW'ed pages by matching them up with "vma->vm_pgoff".
*/
- if (addr == vma->vm_start && end == vma->vm_end)
+ if (addr == vma->vm_start && end == vma->vm_end) {
vma->vm_pgoff = pfn;
- else if (is_cow_mapping(vma->vm_flags))
+ vma->vm_flags |= VM_PFN_AT_MMAP;
+ } else if (is_cow_mapping(vma->vm_flags))
return -EINVAL;
vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
- err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size));
- if (err)
+ err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size));
+ if (err) {
+ /*
+ * To indicate that track_pfn related cleanup is not
+ * needed from higher level routine calling unmap_vmas
+ */
+ vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);
+ vma->vm_flags &= ~VM_PFN_AT_MMAP;
return -EINVAL;
+ }
BUG_ON(addr >= end);
pfn -= addr >> PAGE_SHIFT;
* Don't let another task, with possibly unlocked vma,
* keep the mlocked page.
*/
- if (vma->vm_flags & VM_LOCKED) {
+ if ((vma->vm_flags & VM_LOCKED) && old_page) {
lock_page(old_page); /* for LRU manipulation */
clear_page_mlock(old_page);
unlock_page(old_page);
cow_user_page(new_page, old_page, address, vma);
__SetPageUptodate(new_page);
- if (mem_cgroup_newpage_charge(new_page, mm, GFP_HIGHUSER_MOVABLE))
+ if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
goto oom_free_new;
/*
lock_page(page);
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
- if (mem_cgroup_try_charge_swapin(mm, page,
- GFP_HIGHUSER_MOVABLE, &ptr) == -ENOMEM) {
+ if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
ret = VM_FAULT_OOM;
unlock_page(page);
goto out;
* while the page is counted on swap but not yet in mapcount i.e.
* before page_add_anon_rmap() and swap_free(); try_to_free_swap()
* must be called after the swap_free(), or it will never succeed.
- * And mem_cgroup_commit_charge_swapin(), which uses the swp_entry
- * in page->private, must be called before reuse_swap_page(),
- * which may delete_from_swap_cache().
+ * Because delete_from_swap_page() may be called by reuse_swap_page(),
+ * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
+ * in page->private. In this case, a record in swap_cgroup is silently
+ * discarded at swap_free().
*/
- mem_cgroup_commit_charge_swapin(page, ptr);
inc_mm_counter(mm, anon_rss);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && reuse_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
write_access = 0;
}
-
flush_icache_page(vma, page);
set_pte_at(mm, address, page_table, pte);
page_add_anon_rmap(page, vma, address);
+ /* It's better to call commit-charge after rmap is established */
+ mem_cgroup_commit_charge_swapin(page, ptr);
swap_free(entry);
if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
goto oom;
__SetPageUptodate(page);
- if (mem_cgroup_newpage_charge(page, mm, GFP_HIGHUSER_MOVABLE))
+ if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))
goto oom_free_page;
entry = mk_pte(page, vma->vm_page_prot);
ret = VM_FAULT_OOM;
goto out;
}
- if (mem_cgroup_newpage_charge(page,
- mm, GFP_HIGHUSER_MOVABLE)) {
+ if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) {
ret = VM_FAULT_OOM;
page_cache_release(page);
goto out;
#ifdef CONFIG_PROVE_LOCKING
void might_fault(void)
{
+ /*
+ * Some code (nfs/sunrpc) uses socket ops on kernel memory while
+ * holding the mmap_sem, this is safe because kernel memory doesn't
+ * get paged out, therefore we'll never actually fault, and the
+ * below annotations will generate false positives.
+ */
+ if (segment_eq(get_fs(), KERNEL_DS))
+ return;
+
might_sleep();
/*
* it would be nicer only to annotate paths which are not under