nfsd: nfsd should drop CAP_MKNOD for non-root
[safe/jmp/linux-2.6] / mm / vmstat.c
index 3b5e904..9114974 100644 (file)
@@ -8,34 +8,28 @@
  *  Copyright (C) 2006 Silicon Graphics, Inc.,
  *             Christoph Lameter <christoph@lameter.com>
  */
-
+#include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
+#include <linux/vmstat.h>
 #include <linux/sched.h>
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
 EXPORT_PER_CPU_SYMBOL(vm_event_states);
 
-static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
+static void sum_vm_events(unsigned long *ret, const struct cpumask *cpumask)
 {
-       int cpu = 0;
+       int cpu;
        int i;
 
        memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
 
-       cpu = first_cpu(*cpumask);
-       while (cpu < NR_CPUS) {
+       for_each_cpu_mask_nr(cpu, *cpumask) {
                struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
 
-               cpu = next_cpu(cpu, *cpumask);
-
-               if (cpu < NR_CPUS)
-                       prefetch(&per_cpu(vm_event_states, cpu));
-
-
                for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
                        ret[i] += this->event[i];
        }
@@ -48,7 +42,9 @@ static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
 */
 void all_vm_events(unsigned long *ret)
 {
-       sum_vm_events(ret, &cpu_online_map);
+       get_online_cpus();
+       sum_vm_events(ret, cpu_online_mask);
+       put_online_cpus();
 }
 EXPORT_SYMBOL_GPL(all_vm_events);
 
@@ -284,6 +280,10 @@ EXPORT_SYMBOL(dec_zone_page_state);
 /*
  * Update the zone counters for one cpu.
  *
+ * The cpu specified must be either the current cpu or a processor that
+ * is not online. If it is the current cpu then the execution thread must
+ * be pinned to the current cpu.
+ *
  * Note that refresh_cpu_vm_stats strives to only access
  * node local memory. The per cpu pagesets on remote zones are placed
  * in the memory local to the processor using that pageset. So the
@@ -299,7 +299,7 @@ void refresh_cpu_vm_stats(int cpu)
 {
        struct zone *zone;
        int i;
-       unsigned long flags;
+       int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
 
        for_each_zone(zone) {
                struct per_cpu_pageset *p;
@@ -311,16 +311,21 @@ void refresh_cpu_vm_stats(int cpu)
 
                for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
                        if (p->vm_stat_diff[i]) {
+                               unsigned long flags;
+                               int v;
+
                                local_irq_save(flags);
-                               zone_page_state_add(p->vm_stat_diff[i],
-                                       zone, i);
+                               v = p->vm_stat_diff[i];
                                p->vm_stat_diff[i] = 0;
+                               local_irq_restore(flags);
+                               atomic_long_add(v, &zone->vm_stat[i]);
+                               global_diff[i] += v;
 #ifdef CONFIG_NUMA
                                /* 3 seconds idle till flush */
                                p->expire = 3;
 #endif
-                               local_irq_restore(flags);
                        }
+               cond_resched();
 #ifdef CONFIG_NUMA
                /*
                 * Deal with draining the remote pageset of this
@@ -329,7 +334,7 @@ void refresh_cpu_vm_stats(int cpu)
                 * Check if there are pages remaining in this pageset
                 * if not then there is nothing to expire.
                 */
-               if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count))
+               if (!p->expire || !p->pcp.count)
                        continue;
 
                /*
@@ -344,13 +349,14 @@ void refresh_cpu_vm_stats(int cpu)
                if (p->expire)
                        continue;
 
-               if (p->pcp[0].count)
-                       drain_zone_pages(zone, p->pcp + 0);
-
-               if (p->pcp[1].count)
-                       drain_zone_pages(zone, p->pcp + 1);
+               if (p->pcp.count)
+                       drain_zone_pages(zone, &p->pcp);
 #endif
        }
+
+       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+               if (global_diff[i])
+                       atomic_long_add(global_diff[i], &vm_stat[i]);
 }
 
 #endif
@@ -362,13 +368,13 @@ void refresh_cpu_vm_stats(int cpu)
  *
  * Must be called with interrupts disabled.
  */
-void zone_statistics(struct zonelist *zonelist, struct zone *z)
+void zone_statistics(struct zone *preferred_zone, struct zone *z)
 {
-       if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
+       if (z->zone_pgdat == preferred_zone->zone_pgdat) {
                __inc_zone_state(z, NUMA_HIT);
        } else {
                __inc_zone_state(z, NUMA_MISS);
-               __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
+               __inc_zone_state(preferred_zone, NUMA_FOREIGN);
        }
        if (z->node == numa_node_id())
                __inc_zone_state(z, NUMA_LOCAL);
@@ -378,7 +384,7 @@ void zone_statistics(struct zonelist *zonelist, struct zone *z)
 #endif
 
 #ifdef CONFIG_PROC_FS
-
+#include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
 static char * const migratetype_names[MIGRATE_TYPES] = {
@@ -386,6 +392,7 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
        "Reclaimable",
        "Movable",
        "Reserve",
+       "Isolate",
 };
 
 static void *frag_start(struct seq_file *m, loff_t *pos)
@@ -509,9 +516,26 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
                        continue;
 
                page = pfn_to_page(pfn);
+#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES
+               /*
+                * Ordinarily, memory holes in flatmem still have a valid
+                * memmap for the PFN range. However, an architecture for
+                * embedded systems (e.g. ARM) can free up the memmap backing
+                * holes to save memory on the assumption the memmap is
+                * never used. The page_zone linkages are then broken even
+                * though pfn_valid() returns true. Skip the page if the
+                * linkages are broken. Even if this test passed, the impact
+                * is that the counters for the movable type are off but
+                * fragmentation monitoring is likely meaningless on small
+                * systems.
+                */
+               if (page_zone(page) != zone)
+                       continue;
+#endif
                mtype = get_pageblock_migratetype(page);
 
-               count[mtype]++;
+               if (mtype < MIGRATE_TYPES)
+                       count[mtype]++;
        }
 
        /* Print counts */
@@ -544,6 +568,10 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg)
 {
        pg_data_t *pgdat = (pg_data_t *)arg;
 
+       /* check memoryless node */
+       if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
+               return 0;
+
        seq_printf(m, "Page block order: %d\n", pageblock_order);
        seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
        seq_putc(m, '\n');
@@ -553,20 +581,44 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg)
        return 0;
 }
 
-const struct seq_operations fragmentation_op = {
+static const struct seq_operations fragmentation_op = {
        .start  = frag_start,
        .next   = frag_next,
        .stop   = frag_stop,
        .show   = frag_show,
 };
 
-const struct seq_operations pagetypeinfo_op = {
+static int fragmentation_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &fragmentation_op);
+}
+
+static const struct file_operations fragmentation_file_operations = {
+       .open           = fragmentation_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
+
+static const struct seq_operations pagetypeinfo_op = {
        .start  = frag_start,
        .next   = frag_next,
        .stop   = frag_stop,
        .show   = pagetypeinfo_show,
 };
 
+static int pagetypeinfo_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &pagetypeinfo_op);
+}
+
+static const struct file_operations pagetypeinfo_file_ops = {
+       .open           = pagetypeinfo_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
+
 #ifdef CONFIG_ZONE_DMA
 #define TEXT_FOR_DMA(xx) xx "_dma",
 #else
@@ -591,8 +643,14 @@ const struct seq_operations pagetypeinfo_op = {
 static const char * const vmstat_text[] = {
        /* Zoned VM counters */
        "nr_free_pages",
-       "nr_inactive",
-       "nr_active",
+       "nr_inactive_anon",
+       "nr_active_anon",
+       "nr_inactive_file",
+       "nr_active_file",
+#ifdef CONFIG_UNEVICTABLE_LRU
+       "nr_unevictable",
+       "nr_mlock",
+#endif
        "nr_anon_pages",
        "nr_mapped",
        "nr_file_pages",
@@ -604,6 +662,7 @@ static const char * const vmstat_text[] = {
        "nr_unstable",
        "nr_bounce",
        "nr_vmscan_write",
+       "nr_writeback_temp",
 
 #ifdef CONFIG_NUMA
        "numa_hit",
@@ -642,6 +701,20 @@ static const char * const vmstat_text[] = {
        "allocstall",
 
        "pgrotated",
+#ifdef CONFIG_HUGETLB_PAGE
+       "htlb_buddy_alloc_success",
+       "htlb_buddy_alloc_fail",
+#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+       "unevictable_pgs_culled",
+       "unevictable_pgs_scanned",
+       "unevictable_pgs_rescued",
+       "unevictable_pgs_mlocked",
+       "unevictable_pgs_munlocked",
+       "unevictable_pgs_cleared",
+       "unevictable_pgs_stranded",
+       "unevictable_pgs_mlockfreed",
+#endif
 #endif
 };
 
@@ -655,7 +728,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   "\n        min      %lu"
                   "\n        low      %lu"
                   "\n        high     %lu"
-                  "\n        scanned  %lu (a: %lu i: %lu)"
+                  "\n        scanned  %lu (aa: %lu ia: %lu af: %lu if: %lu)"
                   "\n        spanned  %lu"
                   "\n        present  %lu",
                   zone_page_state(zone, NR_FREE_PAGES),
@@ -663,7 +736,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   zone->pages_low,
                   zone->pages_high,
                   zone->pages_scanned,
-                  zone->nr_scan_active, zone->nr_scan_inactive,
+                  zone->lru[LRU_ACTIVE_ANON].nr_scan,
+                  zone->lru[LRU_INACTIVE_ANON].nr_scan,
+                  zone->lru[LRU_ACTIVE_FILE].nr_scan,
+                  zone->lru[LRU_INACTIVE_FILE].nr_scan,
                   zone->spanned_pages,
                   zone->present_pages);
 
@@ -681,20 +757,17 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   "\n  pagesets");
        for_each_online_cpu(i) {
                struct per_cpu_pageset *pageset;
-               int j;
 
                pageset = zone_pcp(zone, i);
-               for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-                       seq_printf(m,
-                                  "\n    cpu: %i pcp: %i"
-                                  "\n              count: %i"
-                                  "\n              high:  %i"
-                                  "\n              batch: %i",
-                                  i, j,
-                                  pageset->pcp[j].count,
-                                  pageset->pcp[j].high,
-                                  pageset->pcp[j].batch);
-                       }
+               seq_printf(m,
+                          "\n    cpu: %i"
+                          "\n              count: %i"
+                          "\n              high:  %i"
+                          "\n              batch: %i",
+                          i,
+                          pageset->pcp.count,
+                          pageset->pcp.high,
+                          pageset->pcp.batch);
 #ifdef CONFIG_SMP
                seq_printf(m, "\n  vm stats threshold: %d",
                                pageset->stat_threshold);
@@ -703,10 +776,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
        seq_printf(m,
                   "\n  all_unreclaimable: %u"
                   "\n  prev_priority:     %i"
-                  "\n  start_pfn:         %lu",
-                  zone->all_unreclaimable,
+                  "\n  start_pfn:         %lu"
+                  "\n  inactive_ratio:    %u",
+                          zone_is_all_unreclaimable(zone),
                   zone->prev_priority,
-                  zone->zone_start_pfn);
+                  zone->zone_start_pfn,
+                  zone->inactive_ratio);
        seq_putc(m, '\n');
 }
 
@@ -720,7 +795,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
        return 0;
 }
 
-const struct seq_operations zoneinfo_op = {
+static const struct seq_operations zoneinfo_op = {
        .start  = frag_start, /* iterate over all zones. The same as in
                               * fragmentation. */
        .next   = frag_next,
@@ -728,6 +803,18 @@ const struct seq_operations zoneinfo_op = {
        .show   = zoneinfo_show,
 };
 
+static int zoneinfo_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &zoneinfo_op);
+}
+
+static const struct file_operations proc_zoneinfo_file_operations = {
+       .open           = zoneinfo_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
+
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
 {
        unsigned long *v;
@@ -783,13 +870,24 @@ static void vmstat_stop(struct seq_file *m, void *arg)
        m->private = NULL;
 }
 
-const struct seq_operations vmstat_op = {
+static const struct seq_operations vmstat_op = {
        .start  = vmstat_start,
        .next   = vmstat_next,
        .stop   = vmstat_stop,
        .show   = vmstat_show,
 };
 
+static int vmstat_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &vmstat_op);
+}
+
+static const struct file_operations proc_vmstat_file_operations = {
+       .open           = vmstat_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SMP
@@ -803,7 +901,7 @@ static void vmstat_update(struct work_struct *w)
                sysctl_stat_interval);
 }
 
-static void __devinit start_cpu_timer(int cpu)
+static void __cpuinit start_cpu_timer(int cpu)
 {
        struct delayed_work *vmstat_work = &per_cpu(vmstat_work, cpu);
 
@@ -847,9 +945,11 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
 
 static struct notifier_block __cpuinitdata vmstat_notifier =
        { &vmstat_cpuup_callback, NULL, 0 };
+#endif
 
 static int __init setup_vmstat(void)
 {
+#ifdef CONFIG_SMP
        int cpu;
 
        refresh_zone_stat_thresholds();
@@ -857,7 +957,13 @@ static int __init setup_vmstat(void)
 
        for_each_online_cpu(cpu)
                start_cpu_timer(cpu);
+#endif
+#ifdef CONFIG_PROC_FS
+       proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
+       proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
+       proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
+       proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
+#endif
        return 0;
 }
 module_init(setup_vmstat)
-#endif