netns xfrm: fix "ip xfrm state|policy count" misreport
[safe/jmp/linux-2.6] / fs / proc / task_mmu.c
index c492449..f277c4a 100644 (file)
@@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
        mm = mm_for_maps(priv->task);
        if (!mm)
                return NULL;
+       down_read(&mm->mmap_sem);
 
        tail_vma = get_gate_vma(priv->task);
        priv->tail_vma = tail_vma;
@@ -198,35 +199,31 @@ static int do_maps_open(struct inode *inode, struct file *file,
        return ret;
 }
 
-static int show_map(struct seq_file *m, void *v)
+static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 {
-       struct proc_maps_private *priv = m->private;
-       struct task_struct *task = priv->task;
-       struct vm_area_struct *vma = v;
        struct mm_struct *mm = vma->vm_mm;
        struct file *file = vma->vm_file;
        int flags = vma->vm_flags;
        unsigned long ino = 0;
+       unsigned long long pgoff = 0;
        dev_t dev = 0;
        int len;
 
-       if (maps_protect && !ptrace_may_attach(task))
-               return -EACCES;
-
        if (file) {
                struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
                dev = inode->i_sb->s_dev;
                ino = inode->i_ino;
+               pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
        }
 
-       seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+       seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
                        vma->vm_start,
                        vma->vm_end,
                        flags & VM_READ ? 'r' : '-',
                        flags & VM_WRITE ? 'w' : '-',
                        flags & VM_EXEC ? 'x' : '-',
                        flags & VM_MAYSHARE ? 's' : 'p',
-                       vma->vm_pgoff << PAGE_SHIFT,
+                       pgoff,
                        MAJOR(dev), MINOR(dev), ino, &len);
 
        /*
@@ -246,6 +243,25 @@ static int show_map(struct seq_file *m, void *v)
                                } else if (vma->vm_start <= mm->start_stack &&
                                           vma->vm_end >= mm->start_stack) {
                                        name = "[stack]";
+                               } else {
+                                       unsigned long stack_start;
+                                       struct proc_maps_private *pmp;
+
+                                       pmp = m->private;
+                                       stack_start = pmp->task->stack_start;
+
+                                       if (vma->vm_start <= stack_start &&
+                                           vma->vm_end >= stack_start) {
+                                               pad_len_spaces(m, len);
+                                               seq_printf(m,
+                                                "[threadstack:%08lx]",
+#ifdef CONFIG_STACK_GROWSUP
+                                                vma->vm_end - stack_start
+#else
+                                                stack_start - vma->vm_start
+#endif
+                                               );
+                                       }
                                }
                        } else {
                                name = "[vdso]";
@@ -257,6 +273,15 @@ static int show_map(struct seq_file *m, void *v)
                }
        }
        seq_putc(m, '\n');
+}
+
+static int show_map(struct seq_file *m, void *v)
+{
+       struct vm_area_struct *vma = v;
+       struct proc_maps_private *priv = m->private;
+       struct task_struct *task = priv->task;
+
+       show_map_vma(m, vma);
 
        if (m->count < m->size)  /* vma is copied successfully */
                m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
@@ -336,12 +361,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                if (!pte_present(ptent))
                        continue;
 
-               mss->resident += PAGE_SIZE;
-
                page = vm_normal_page(vma, addr, ptent);
                if (!page)
                        continue;
 
+               mss->resident += PAGE_SIZE;
                /* Accumulate the size in pages that have been accessed. */
                if (pte_young(ptent) || PageReferenced(page))
                        mss->referenced += PAGE_SIZE;
@@ -367,9 +391,10 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 
 static int show_smap(struct seq_file *m, void *v)
 {
+       struct proc_maps_private *priv = m->private;
+       struct task_struct *task = priv->task;
        struct vm_area_struct *vma = v;
        struct mem_size_stats mss;
-       int ret;
        struct mm_walk smaps_walk = {
                .pmd_entry = smaps_pte_range,
                .mm = vma->vm_mm,
@@ -381,9 +406,7 @@ static int show_smap(struct seq_file *m, void *v)
        if (vma->vm_mm && !is_vm_hugetlb_page(vma))
                walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
 
-       ret = show_map(m, v);
-       if (ret)
-               return ret;
+       show_map_vma(m, vma);
 
        seq_printf(m,
                   "Size:           %8lu kB\n"
@@ -394,7 +417,9 @@ static int show_smap(struct seq_file *m, void *v)
                   "Private_Clean:  %8lu kB\n"
                   "Private_Dirty:  %8lu kB\n"
                   "Referenced:     %8lu kB\n"
-                  "Swap:           %8lu kB\n",
+                  "Swap:           %8lu kB\n"
+                  "KernelPageSize: %8lu kB\n"
+                  "MMUPageSize:    %8lu kB\n",
                   (vma->vm_end - vma->vm_start) >> 10,
                   mss.resident >> 10,
                   (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
@@ -403,9 +428,13 @@ static int show_smap(struct seq_file *m, void *v)
                   mss.private_clean >> 10,
                   mss.private_dirty >> 10,
                   mss.referenced >> 10,
-                  mss.swap >> 10);
+                  mss.swap >> 10,
+                  vma_kernel_pagesize(vma) >> 10,
+                  vma_mmu_pagesize(vma) >> 10);
 
-       return ret;
+       if (m->count < m->size)  /* vma is copied successfully */
+               m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
+       return 0;
 }
 
 static const struct seq_operations proc_pid_smaps_op = {
@@ -454,23 +483,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
        return 0;
 }
 
+#define CLEAR_REFS_ALL 1
+#define CLEAR_REFS_ANON 2
+#define CLEAR_REFS_MAPPED 3
+
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                                size_t count, loff_t *ppos)
 {
        struct task_struct *task;
-       char buffer[PROC_NUMBUF], *end;
+       char buffer[PROC_NUMBUF];
        struct mm_struct *mm;
        struct vm_area_struct *vma;
+       long type;
 
        memset(buffer, 0, sizeof(buffer));
        if (count > sizeof(buffer) - 1)
                count = sizeof(buffer) - 1;
        if (copy_from_user(buffer, buf, count))
                return -EFAULT;
-       if (!simple_strtol(buffer, &end, 0))
+       if (strict_strtol(strstrip(buffer), 10, &type))
+               return -EINVAL;
+       if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
                return -EINVAL;
-       if (*end == '\n')
-               end++;
        task = get_proc_task(file->f_path.dentry->d_inode);
        if (!task)
                return -ESRCH;
@@ -483,18 +517,31 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                down_read(&mm->mmap_sem);
                for (vma = mm->mmap; vma; vma = vma->vm_next) {
                        clear_refs_walk.private = vma;
-                       if (!is_vm_hugetlb_page(vma))
-                               walk_page_range(vma->vm_start, vma->vm_end,
-                                               &clear_refs_walk);
+                       if (is_vm_hugetlb_page(vma))
+                               continue;
+                       /*
+                        * Writing 1 to /proc/pid/clear_refs affects all pages.
+                        *
+                        * Writing 2 to /proc/pid/clear_refs only affects
+                        * Anonymous pages.
+                        *
+                        * Writing 3 to /proc/pid/clear_refs only affects file
+                        * mapped pages.
+                        */
+                       if (type == CLEAR_REFS_ANON && vma->vm_file)
+                               continue;
+                       if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
+                               continue;
+                       walk_page_range(vma->vm_start, vma->vm_end,
+                                       &clear_refs_walk);
                }
                flush_tlb_mm(mm);
                up_read(&mm->mmap_sem);
                mmput(mm);
        }
        put_task_struct(task);
-       if (end - buffer == 0)
-               return -EIO;
-       return end - buffer;
+
+       return count;
 }
 
 const struct file_operations proc_clear_refs_operations = {
@@ -553,9 +600,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
        return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
 }
 
-static unsigned long pte_to_pagemap_entry(pte_t pte)
+static u64 pte_to_pagemap_entry(pte_t pte)
 {
-       unsigned long pme = 0;
+       u64 pme = 0;
        if (is_swap_pte(pte))
                pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
                        | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
@@ -602,6 +649,50 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        return err;
 }
 
+static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
+{
+       u64 pme = 0;
+       if (pte_present(pte))
+               pme = PM_PFRAME(pte_pfn(pte) + offset)
+                       | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
+       return pme;
+}
+
+static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr,
+                                unsigned long end, struct mm_walk *walk)
+{
+       struct vm_area_struct *vma;
+       struct pagemapread *pm = walk->private;
+       struct hstate *hs = NULL;
+       int err = 0;
+
+       vma = find_vma(walk->mm, addr);
+       if (vma)
+               hs = hstate_vma(vma);
+       for (; addr != end; addr += PAGE_SIZE) {
+               u64 pfn = PM_NOT_PRESENT;
+
+               if (vma && (addr >= vma->vm_end)) {
+                       vma = find_vma(walk->mm, addr);
+                       if (vma)
+                               hs = hstate_vma(vma);
+               }
+
+               if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) {
+                       /* calculate pfn of the "raw" page in the hugepage. */
+                       int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT;
+                       pfn = huge_pte_to_pagemap_entry(*pte, offset);
+               }
+               err = add_to_pagemap(addr, pfn, pm);
+               if (err)
+                       return err;
+       }
+
+       cond_resched();
+
+       return err;
+}
+
 /*
  * /proc/pid/pagemap - an array mapping virtual pages to pfns
  *
@@ -636,7 +727,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        struct pagemapread pm;
        int pagecount;
        int ret = -ESRCH;
-       struct mm_walk pagemap_walk;
+       struct mm_walk pagemap_walk = {};
        unsigned long src;
        unsigned long svpfn;
        unsigned long start_vaddr;
@@ -646,7 +737,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                goto out;
 
        ret = -EACCES;
-       if (!ptrace_may_attach(task))
+       if (!ptrace_may_access(task, PTRACE_MODE_READ))
                goto out_task;
 
        ret = -EINVAL;
@@ -655,6 +746,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                goto out_task;
 
        ret = 0;
+
+       if (!count)
+               goto out_task;
+
        mm = get_task_mm(task);
        if (!mm)
                goto out_task;
@@ -685,11 +780,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                goto out_pages;
        }
 
-       pm.out = (u64 *)buf;
-       pm.end = (u64 *)(buf + count);
+       pm.out = (u64 __user *)buf;
+       pm.end = (u64 __user *)(buf + count);
 
        pagemap_walk.pmd_entry = pagemap_pte_range;
        pagemap_walk.pte_hole = pagemap_pte_hole;
+       pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
        pagemap_walk.mm = mm;
        pagemap_walk.private = &pm;
 
@@ -712,9 +808,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        if (ret == PM_END_OF_BUFFER)
                ret = 0;
        /* don't need mmap_sem for these, but this looks cleaner */
-       *ppos += (char *)pm.out - buf;
+       *ppos += (char __user *)pm.out - buf;
        if (!ret)
-               ret = (char *)pm.out - buf;
+               ret = (char __user *)pm.out - buf;
 
 out_pages:
        for (; pagecount; pagecount--) {
@@ -742,22 +838,11 @@ const struct file_operations proc_pagemap_operations = {
 #ifdef CONFIG_NUMA
 extern int show_numa_map(struct seq_file *m, void *v);
 
-static int show_numa_map_checked(struct seq_file *m, void *v)
-{
-       struct proc_maps_private *priv = m->private;
-       struct task_struct *task = priv->task;
-
-       if (maps_protect && !ptrace_may_attach(task))
-               return -EACCES;
-
-       return show_numa_map(m, v);
-}
-
 static const struct seq_operations proc_pid_numa_maps_op = {
         .start  = m_start,
         .next   = m_next,
         .stop   = m_stop,
-        .show   = show_numa_map_checked
+        .show   = show_numa_map,
 };
 
 static int numa_maps_open(struct inode *inode, struct file *file)