#include <linux/highmem.h>
#include <linux/ptrace.h>
#include <linux/pagemap.h>
-#include <linux/ptrace.h>
#include <linux/mempolicy.h>
#include <linux/swap.h>
#include <linux/swapops.h>
-#include <linux/seq_file.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
return mm->total_vm;
}
-int proc_exe_link(struct inode *inode, struct path *path)
-{
- struct vm_area_struct * vma;
- int result = -ENOENT;
- struct task_struct *task = get_proc_task(inode);
- struct mm_struct * mm = NULL;
-
- if (task) {
- mm = get_task_mm(task);
- put_task_struct(task);
- }
- if (!mm)
- goto out;
- down_read(&mm->mmap_sem);
-
- vma = mm->mmap;
- while (vma) {
- if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
- break;
- vma = vma->vm_next;
- }
-
- if (vma) {
- *path = vma->vm_file->f_path;
- path_get(&vma->vm_file->f_path);
- result = 0;
- }
-
- up_read(&mm->mmap_sem);
- mmput(mm);
-out:
- return result;
-}
-
static void pad_len_spaces(struct seq_file *m, int len)
{
len = 25 + sizeof(void*) * 6 - len;
return ret;
}
-static int show_map(struct seq_file *m, void *v)
+static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{
- struct proc_maps_private *priv = m->private;
- struct task_struct *task = priv->task;
- struct vm_area_struct *vma = v;
struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
int flags = vma->vm_flags;
dev_t dev = 0;
int len;
- if (maps_protect && !ptrace_may_attach(task))
- return -EACCES;
-
if (file) {
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
}
- seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+ seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
vma->vm_start,
vma->vm_end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? 's' : 'p',
- vma->vm_pgoff << PAGE_SHIFT,
+ ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
MAJOR(dev), MINOR(dev), ino, &len);
/*
}
}
seq_putc(m, '\n');
+}
+
+static int show_map(struct seq_file *m, void *v)
+{
+ struct vm_area_struct *vma = v;
+ struct proc_maps_private *priv = m->private;
+ struct task_struct *task = priv->task;
+
+ show_map_vma(m, vma);
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
#define PSS_SHIFT 12
#ifdef CONFIG_PROC_PAGE_MONITOR
-struct mem_size_stats
-{
+struct mem_size_stats {
struct vm_area_struct *vma;
unsigned long resident;
unsigned long shared_clean;
unsigned long private_clean;
unsigned long private_dirty;
unsigned long referenced;
+ unsigned long swap;
u64 pss;
};
static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- void *private)
+ struct mm_walk *walk)
{
- struct mem_size_stats *mss = private;
+ struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = mss->vma;
pte_t *pte, ptent;
spinlock_t *ptl;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
+
+ if (is_swap_pte(ptent)) {
+ mss->swap += PAGE_SIZE;
+ continue;
+ }
+
if (!pte_present(ptent))
continue;
return 0;
}
-static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
-
static int show_smap(struct seq_file *m, void *v)
{
+ struct proc_maps_private *priv = m->private;
+ struct task_struct *task = priv->task;
struct vm_area_struct *vma = v;
struct mem_size_stats mss;
- int ret;
+ struct mm_walk smaps_walk = {
+ .pmd_entry = smaps_pte_range,
+ .mm = vma->vm_mm,
+ .private = &mss,
+ };
memset(&mss, 0, sizeof mss);
mss.vma = vma;
if (vma->vm_mm && !is_vm_hugetlb_page(vma))
- walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
- &smaps_walk, &mss);
+ walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
- ret = show_map(m, v);
- if (ret)
- return ret;
+ show_map_vma(m, vma);
seq_printf(m,
"Size: %8lu kB\n"
"Shared_Dirty: %8lu kB\n"
"Private_Clean: %8lu kB\n"
"Private_Dirty: %8lu kB\n"
- "Referenced: %8lu kB\n",
+ "Referenced: %8lu kB\n"
+ "Swap: %8lu kB\n",
(vma->vm_end - vma->vm_start) >> 10,
mss.resident >> 10,
(unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
mss.shared_dirty >> 10,
mss.private_clean >> 10,
mss.private_dirty >> 10,
- mss.referenced >> 10);
+ mss.referenced >> 10,
+ mss.swap >> 10);
- return ret;
+ if (m->count < m->size) /* vma is copied successfully */
+ m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
+ return 0;
}
static const struct seq_operations proc_pid_smaps_op = {
};
static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
- unsigned long end, void *private)
+ unsigned long end, struct mm_walk *walk)
{
- struct vm_area_struct *vma = private;
+ struct vm_area_struct *vma = walk->private;
pte_t *pte, ptent;
spinlock_t *ptl;
struct page *page;
return 0;
}
-static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
-
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
+ struct mm_walk clear_refs_walk = {
+ .pmd_entry = clear_refs_pte_range,
+ .mm = mm,
+ };
down_read(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next)
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ clear_refs_walk.private = vma;
if (!is_vm_hugetlb_page(vma))
- walk_page_range(mm, vma->vm_start, vma->vm_end,
- &clear_refs_walk, vma);
+ walk_page_range(vma->vm_start, vma->vm_end,
+ &clear_refs_walk);
+ }
flush_tlb_mm(mm);
up_read(&mm->mmap_sem);
mmput(mm);
};
struct pagemapread {
- char __user *out, *end;
+ u64 __user *out, *end;
};
-#define PM_ENTRY_BYTES sizeof(u64)
-#define PM_RESERVED_BITS 3
-#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS)
-#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET)
-#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK)
-#define PM_NOT_PRESENT PM_SPECIAL(1LL)
-#define PM_SWAP PM_SPECIAL(2LL)
+#define PM_ENTRY_BYTES sizeof(u64)
+#define PM_STATUS_BITS 3
+#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
+#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
+#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
+#define PM_PSHIFT_BITS 6
+#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
+#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
+#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
+#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
+#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
+
+#define PM_PRESENT PM_STATUS(4LL)
+#define PM_SWAP PM_STATUS(2LL)
+#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
#define PM_END_OF_BUFFER 1
static int add_to_pagemap(unsigned long addr, u64 pfn,
struct pagemapread *pm)
{
- /*
- * Make sure there's room in the buffer for an
- * entire entry. Otherwise, only copy part of
- * the pfn.
- */
- if (pm->out + PM_ENTRY_BYTES >= pm->end) {
- if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
- return -EFAULT;
- pm->out = pm->end;
- return PM_END_OF_BUFFER;
- }
-
if (put_user(pfn, pm->out))
return -EFAULT;
- pm->out += PM_ENTRY_BYTES;
+ pm->out++;
+ if (pm->out >= pm->end)
+ return PM_END_OF_BUFFER;
return 0;
}
static int pagemap_pte_hole(unsigned long start, unsigned long end,
- void *private)
+ struct mm_walk *walk)
{
- struct pagemapread *pm = private;
+ struct pagemapread *pm = walk->private;
unsigned long addr;
int err = 0;
for (addr = start; addr < end; addr += PAGE_SIZE) {
return err;
}
-u64 swap_pte_to_pagemap_entry(pte_t pte)
+static u64 swap_pte_to_pagemap_entry(pte_t pte)
{
swp_entry_t e = pte_to_swp_entry(pte);
- return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
+ return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
+}
+
+static unsigned long pte_to_pagemap_entry(pte_t pte)
+{
+ unsigned long pme = 0;
+ if (is_swap_pte(pte))
+ pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
+ | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
+ else if (pte_present(pte))
+ pme = PM_PFRAME(pte_pfn(pte))
+ | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
+ return pme;
}
static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- void *private)
+ struct mm_walk *walk)
{
- struct pagemapread *pm = private;
+ struct vm_area_struct *vma;
+ struct pagemapread *pm = walk->private;
pte_t *pte;
int err = 0;
+ /* find the first VMA at or above 'addr' */
+ vma = find_vma(walk->mm, addr);
for (; addr != end; addr += PAGE_SIZE) {
u64 pfn = PM_NOT_PRESENT;
- pte = pte_offset_map(pmd, addr);
- if (is_swap_pte(*pte))
- pfn = swap_pte_to_pagemap_entry(*pte);
- else if (pte_present(*pte))
- pfn = pte_pfn(*pte);
- /* unmap so we're not in atomic when we copy to userspace */
- pte_unmap(pte);
+
+ /* check to see if we've left 'vma' behind
+ * and need a new, higher one */
+ if (vma && (addr >= vma->vm_end))
+ vma = find_vma(walk->mm, addr);
+
+ /* check that 'vma' actually covers this address,
+ * and that it isn't a huge page vma */
+ if (vma && (vma->vm_start <= addr) &&
+ !is_vm_hugetlb_page(vma)) {
+ pte = pte_offset_map(pmd, addr);
+ pfn = pte_to_pagemap_entry(*pte);
+ /* unmap before userspace copy */
+ pte_unmap(pte);
+ }
err = add_to_pagemap(addr, pfn, pm);
if (err)
return err;
return err;
}
-static struct mm_walk pagemap_walk = {
- .pmd_entry = pagemap_pte_range,
- .pte_hole = pagemap_pte_hole
-};
-
/*
* /proc/pid/pagemap - an array mapping virtual pages to pfns
*
- * For each page in the address space, this file contains one 64-bit
- * entry representing the corresponding physical page frame number
- * (PFN) if the page is present. If there is a swap entry for the
- * physical page, then an encoding of the swap file number and the
- * page's offset into the swap file are returned. If no page is
- * present at all, PM_NOT_PRESENT is returned. This allows determining
+ * For each page in the address space, this file contains one 64-bit entry
+ * consisting of the following:
+ *
+ * Bits 0-55 page frame number (PFN) if present
+ * Bits 0-4 swap type if swapped
+ * Bits 5-55 swap offset if swapped
+ * Bits 55-60 page shift (page size = 1<<page shift)
+ * Bit 61 reserved for future use
+ * Bit 62 page swapped
+ * Bit 63 page present
+ *
+ * If the page is not present but in swap, then the PFN contains an
+ * encoding of the swap file number and the page's offset into the
+ * swap. Unmapped pages return a null PFN. This allows determining
* precisely which pages are mapped (or in swap) and comparing mapped
* pages between processes.
*
struct pagemapread pm;
int pagecount;
int ret = -ESRCH;
+ struct mm_walk pagemap_walk = {};
+ unsigned long src;
+ unsigned long svpfn;
+ unsigned long start_vaddr;
+ unsigned long end_vaddr;
if (!task)
goto out;
ret = -EACCES;
- if (!ptrace_may_attach(task))
- goto out;
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out_task;
ret = -EINVAL;
/* file position must be aligned */
- if (*ppos % PM_ENTRY_BYTES)
- goto out;
+ if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
+ goto out_task;
ret = 0;
mm = get_task_mm(task);
if (!mm)
- goto out;
+ goto out_task;
+
- ret = -ENOMEM;
uaddr = (unsigned long)buf & PAGE_MASK;
uend = (unsigned long)(buf + count);
pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
- pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
+ ret = 0;
+ if (pagecount == 0)
+ goto out_mm;
+ pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ ret = -ENOMEM;
if (!pages)
- goto out_task;
+ goto out_mm;
down_read(¤t->mm->mmap_sem);
ret = get_user_pages(current, current->mm, uaddr, pagecount,
if (ret < 0)
goto out_free;
- pm.out = buf;
- pm.end = buf + count;
-
- if (!ptrace_may_attach(task)) {
- ret = -EIO;
- } else {
- unsigned long src = *ppos;
- unsigned long svpfn = src / PM_ENTRY_BYTES;
- unsigned long start_vaddr = svpfn << PAGE_SHIFT;
- unsigned long end_vaddr = TASK_SIZE_OF(task);
-
- /* watch out for wraparound */
- if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
- start_vaddr = end_vaddr;
-
- /*
- * The odds are that this will stop walking way
- * before end_vaddr, because the length of the
- * user buffer is tracked in "pm", and the walk
- * will stop when we hit the end of the buffer.
- */
- ret = walk_page_range(mm, start_vaddr, end_vaddr,
- &pagemap_walk, &pm);
- if (ret == PM_END_OF_BUFFER)
- ret = 0;
- /* don't need mmap_sem for these, but this looks cleaner */
- *ppos += pm.out - buf;
- if (!ret)
- ret = pm.out - buf;
+ if (ret != pagecount) {
+ pagecount = ret;
+ ret = -EFAULT;
+ goto out_pages;
}
+ pm.out = (u64 *)buf;
+ pm.end = (u64 *)(buf + count);
+
+ pagemap_walk.pmd_entry = pagemap_pte_range;
+ pagemap_walk.pte_hole = pagemap_pte_hole;
+ pagemap_walk.mm = mm;
+ pagemap_walk.private = ±
+
+ src = *ppos;
+ svpfn = src / PM_ENTRY_BYTES;
+ start_vaddr = svpfn << PAGE_SHIFT;
+ end_vaddr = TASK_SIZE_OF(task);
+
+ /* watch out for wraparound */
+ if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+ start_vaddr = end_vaddr;
+
+ /*
+ * The odds are that this will stop walking way
+ * before end_vaddr, because the length of the
+ * user buffer is tracked in "pm", and the walk
+ * will stop when we hit the end of the buffer.
+ */
+ ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk);
+ if (ret == PM_END_OF_BUFFER)
+ ret = 0;
+ /* don't need mmap_sem for these, but this looks cleaner */
+ *ppos += (char *)pm.out - buf;
+ if (!ret)
+ ret = (char *)pm.out - buf;
+
+out_pages:
for (; pagecount; pagecount--) {
page = pages[pagecount-1];
if (!PageReserved(page))
SetPageDirty(page);
page_cache_release(page);
}
- mmput(mm);
out_free:
kfree(pages);
+out_mm:
+ mmput(mm);
out_task:
put_task_struct(task);
out:
#ifdef CONFIG_NUMA
extern int show_numa_map(struct seq_file *m, void *v);
-static int show_numa_map_checked(struct seq_file *m, void *v)
-{
- struct proc_maps_private *priv = m->private;
- struct task_struct *task = priv->task;
-
- if (maps_protect && !ptrace_may_attach(task))
- return -EACCES;
-
- return show_numa_map(m, v);
-}
-
static const struct seq_operations proc_pid_numa_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_numa_map_checked
+ .show = show_numa_map,
};
static int numa_maps_open(struct inode *inode, struct file *file)