X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;ds=sidebyside;f=mm%2Fmemory.c;h=baa999e87cd21d2f9e67c8e3b1093bb6831513fb;hb=6644107d57a8fa82b47e4c55da4d9d91a612f29c;hp=3f8fa06b963b281191ac2c1350af0b21b2dad7c5;hpb=4779280d1ea4d361af13ae77ba55217fbcd16d4c;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/memory.c b/mm/memory.c index 3f8fa06..baa999e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1511,6 +1511,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { int ret; + pgprot_t pgprot = vma->vm_page_prot; /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like @@ -1525,10 +1526,10 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; - if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE)) + if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE)) return -EINVAL; - ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot); + ret = insert_pfn(vma, addr, pfn, pgprot); if (ret) untrack_pfn_vma(vma, pfn, PAGE_SIZE); @@ -1671,9 +1672,15 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; - err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size)); - if (err) + err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size)); + if (err) { + /* + * To indicate that track_pfn related cleanup is not + * needed from higher level routine calling unmap_vmas + */ + vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP); return -EINVAL; + } BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; @@ -1992,7 +1999,7 @@ gotten: * Don't let another task, with possibly unlocked vma, * keep the mlocked page. */ - if (vma->vm_flags & VM_LOCKED) { + if ((vma->vm_flags & VM_LOCKED) && old_page) { lock_page(old_page); /* for LRU manipulation */ clear_page_mlock(old_page); unlock_page(old_page); @@ -2000,7 +2007,7 @@ gotten: cow_user_page(new_page, old_page, address, vma); __SetPageUptodate(new_page); - if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) + if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) goto oom_free_new; /* @@ -2392,6 +2399,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; swp_entry_t entry; pte_t pte; + struct mem_cgroup *ptr = NULL; int ret = 0; if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) @@ -2430,7 +2438,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, lock_page(page); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { + if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { ret = VM_FAULT_OOM; unlock_page(page); goto out; @@ -2448,7 +2456,19 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_nomap; } - /* The page isn't present yet, go ahead with the fault. */ + /* + * The page isn't present yet, go ahead with the fault. + * + * Be careful about the sequence of operations here. + * To get its accounting right, reuse_swap_page() must be called + * while the page is counted on swap but not yet in mapcount i.e. + * before page_add_anon_rmap() and swap_free(); try_to_free_swap() + * must be called after the swap_free(), or it will never succeed. + * Because delete_from_swap_page() may be called by reuse_swap_page(), + * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry + * in page->private. In this case, a record in swap_cgroup is silently + * discarded at swap_free(). + */ inc_mm_counter(mm, anon_rss); pte = mk_pte(page, vma->vm_page_prot); @@ -2456,10 +2476,11 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, pte = maybe_mkwrite(pte_mkdirty(pte), vma); write_access = 0; } - flush_icache_page(vma, page); set_pte_at(mm, address, page_table, pte); page_add_anon_rmap(page, vma, address); + /* It's better to call commit-charge after rmap is established */ + mem_cgroup_commit_charge_swapin(page, ptr); swap_free(entry); if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) @@ -2480,7 +2501,7 @@ unlock: out: return ret; out_nomap: - mem_cgroup_uncharge_page(page); + mem_cgroup_cancel_charge_swapin(ptr); pte_unmap_unlock(page_table, ptl); unlock_page(page); page_cache_release(page); @@ -2510,7 +2531,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, goto oom; __SetPageUptodate(page); - if (mem_cgroup_charge(page, mm, GFP_KERNEL)) + if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) goto oom_free_page; entry = mk_pte(page, vma->vm_page_prot); @@ -2601,7 +2622,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ret = VM_FAULT_OOM; goto out; } - if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { + if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) { ret = VM_FAULT_OOM; page_cache_release(page); goto out; @@ -3151,6 +3172,15 @@ void print_vma_addr(char *prefix, unsigned long ip) #ifdef CONFIG_PROVE_LOCKING void might_fault(void) { + /* + * Some code (nfs/sunrpc) uses socket ops on kernel memory while + * holding the mmap_sem, this is safe because kernel memory doesn't + * get paged out, therefore we'll never actually fault, and the + * below annotations will generate false positives. + */ + if (segment_eq(get_fs(), KERNEL_DS)) + return; + might_sleep(); /* * it would be nicer only to annotate paths which are not under