Merge branch 'master' into percpu

author Tejun Heo <tj@kernel.org>

Tue, 2 Feb 2010 05:38:15 +0000 (14:38 +0900)

committer Tejun Heo <tj@kernel.org>

Tue, 2 Feb 2010 05:38:15 +0000 (14:38 +0900)
author Tejun Heo <tj@kernel.org>
Tue, 2 Feb 2010 05:38:15 +0000 (14:38 +0900)
committer Tejun Heo <tj@kernel.org>
Tue, 2 Feb 2010 05:38:15 +0000 (14:38 +0900)
diff --combined arch/sparc/kernel/nmi.c

index 9b9f5b4,d242a73..b287b62
--- 1/arch/sparc/kernel/nmi.c
--- 2/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@@ -21,6 -21,7 +21,6 @@@
   
   #include <asm/perf_event.h>
   #include <asm/ptrace.h>
- -#include <asm/local.h>
   #include <asm/pcr.h>
   
   /* We don't have a real NMI on sparc64, but we can fake one
@@@ -95,7 -96,6 +95,6 @@@ notrace __kprobes void perfctr_irq(int 
         int cpu = smp_processor_id();
   
         clear_softint(1 << irq);
-       pcr_ops->write(PCR_PIC_PRIV);
   
         local_cpu_data().__nmi_count++;
   
@@@ -104,6 -104,8 +103,8 @@@
         if (notify_die(DIE_NMI, "nmi", regs, 0,
                        pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
                 touched = 1;
+       else
+               pcr_ops->write(PCR_PIC_PRIV);
   
         sum = kstat_irqs_cpu(0, cpu);
         if (__get_cpu_var(nmi_touch)) {
@@@ -111,13 -113,13 +112,13 @@@
                 touched = 1;
         }
         if (!touched && __get_cpu_var(last_irq_sum) == sum) {
- -              __this_cpu_inc(per_cpu_var(alert_counter));
- -              if (__this_cpu_read(per_cpu_var(alert_counter)) == 30 * nmi_hz)
+ +              __this_cpu_inc(alert_counter);
+ +              if (__this_cpu_read(alert_counter) == 30 * nmi_hz)
                         die_nmi("BUG: NMI Watchdog detected LOCKUP",
                                 regs, panic_on_timeout);
         } else {
                 __get_cpu_var(last_irq_sum) = sum;
- -              __this_cpu_write(per_cpu_var(alert_counter), 0);
+ +              __this_cpu_write(alert_counter, 0);
         }
         if (__get_cpu_var(wd_enabled)) {
                 write_pic(picl_value(nmi_hz));
diff --combined include/linux/mm.h

index 554fa39,60c467b..91d2ba1
--- 1/include/linux/mm.h
--- 2/include/linux/mm.h
+++ b/include/linux/mm.h
@@@ -1079,12 -1079,17 +1079,13 @@@ extern void si_meminfo(struct sysinfo 
   extern void si_meminfo_node(struct sysinfo *val, int nid);
   extern int after_bootmem;
   
- -#ifdef CONFIG_NUMA
   extern void setup_per_cpu_pageset(void);
- -#else
- -static inline void setup_per_cpu_pageset(void) {}
- -#endif
   
   extern void zone_pcp_update(struct zone *zone);
   
   /* nommu.c */
   extern atomic_long_t mmap_pages_allocated;
+ extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
   
   /* prio_tree.c */
   void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
diff --combined kernel/module.c

index 9bf2280,f82386b..e5538d5
--- 1/kernel/module.c
--- 2/kernel/module.c
+++ b/kernel/module.c
@@@ -474,10 -474,9 +474,10 @@@ static void module_unload_init(struct m
   
         INIT_LIST_HEAD(&mod->modules_which_use_me);
         for_each_possible_cpu(cpu)
- -              local_set(__module_ref_addr(mod, cpu), 0);
+ +              per_cpu_ptr(mod->refptr, cpu)->count = 0;
+ +
         /* Hold reference count during initialization. */
- -      local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
+ +      __this_cpu_write(mod->refptr->count, 1);
         /* Backwards compatibility macros put refcount during init. */
         mod->waiter = current;
   }
@@@ -620,7 -619,7 +620,7 @@@ unsigned int module_refcount(struct mod
         int cpu;
   
         for_each_possible_cpu(cpu)
- -              total += local_read(__module_ref_addr(mod, cpu));
+ +              total += per_cpu_ptr(mod->refptr, cpu)->count;
         return total;
   }
   EXPORT_SYMBOL(module_refcount);
@@@ -797,15 -796,14 +797,15 @@@ static struct module_attribute refcnt 
   void module_put(struct module *module)
   {
         if (module) {
- -              unsigned int cpu = get_cpu();
- -              local_dec(__module_ref_addr(module, cpu));
+ +              preempt_disable();
+ +              __this_cpu_dec(module->refptr->count);
+ +
                 trace_module_put(module, _RET_IP_,
- -                               local_read(__module_ref_addr(module, cpu)));
+ +                               __this_cpu_read(module->refptr->count));
                 /* Maybe they're waiting for us to drop reference? */
                 if (unlikely(!module_is_live(module)))
                         wake_up_process(module->waiter);
- -              put_cpu();
+ +              preempt_enable();
         }
   }
   EXPORT_SYMBOL(module_put);
@@@ -1012,6 -1010,12 +1012,12 @@@ static const struct kernel_symbol *reso
    * J. Corbet <corbet@lwn.net>
    */
   #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS)
+ 
+ static inline bool sect_empty(const Elf_Shdr *sect)
+ {
+       return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
+ }
+ 
   struct module_sect_attr
   {
         struct module_attribute mattr;
@@@ -1053,8 -1057,7 +1059,7 @@@ static void add_sect_attrs(struct modul
   
         /* Count loaded sections and allocate structures */
         for (i = 0; i < nsect; i++)
-               if (sechdrs[i].sh_flags & SHF_ALLOC
-                   && sechdrs[i].sh_size)
+               if (!sect_empty(&sechdrs[i]))
                         nloaded++;
         size[0] = ALIGN(sizeof(*sect_attrs)
                         + nloaded * sizeof(sect_attrs->attrs[0]),
@@@ -1072,9 -1075,7 +1077,7 @@@
         sattr = &sect_attrs->attrs[0];
         gattr = &sect_attrs->grp.attrs[0];
         for (i = 0; i < nsect; i++) {
-               if (! (sechdrs[i].sh_flags & SHF_ALLOC))
-                       continue;
-               if (!sechdrs[i].sh_size)
+               if (sect_empty(&sechdrs[i]))
                         continue;
                 sattr->address = sechdrs[i].sh_addr;
                 sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
@@@ -1158,7 -1159,7 +1161,7 @@@ static void add_notes_attrs(struct modu
         /* Count notes sections and allocate structures.  */
         notes = 0;
         for (i = 0; i < nsect; i++)
-               if ((sechdrs[i].sh_flags & SHF_ALLOC) &&
+               if (!sect_empty(&sechdrs[i]) &&
                     (sechdrs[i].sh_type == SHT_NOTE))
                         ++notes;
   
@@@ -1174,7 -1175,7 +1177,7 @@@
         notes_attrs->notes = notes;
         nattr = &notes_attrs->attrs[0];
         for (loaded = i = 0; i < nsect; ++i) {
-               if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+               if (sect_empty(&sechdrs[i]))
                         continue;
                 if (sechdrs[i].sh_type == SHT_NOTE) {
                         nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
@@@ -1396,9 -1397,9 +1399,9 @@@ static void free_module(struct module *
         kfree(mod->args);
         if (mod->percpu)
                 percpu_modfree(mod->percpu);
- -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+ +#if defined(CONFIG_MODULE_UNLOAD)
         if (mod->refptr)
- -              percpu_modfree(mod->refptr);
+ +              free_percpu(mod->refptr);
   #endif
         /* Free lock-classes: */
         lockdep_free_key_range(mod->module_core, mod->core_size);
@@@ -2161,8 -2162,9 +2164,8 @@@ static noinline struct module *load_mod
         mod = (void *)sechdrs[modindex].sh_addr;
         kmemleak_load_module(mod, hdr, sechdrs, secstrings);
   
- -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
- -      mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
- -                                    mod->name);
+ +#if defined(CONFIG_MODULE_UNLOAD)
+ +      mod->refptr = alloc_percpu(struct module_ref);
         if (!mod->refptr) {
                 err = -ENOMEM;
                 goto free_init;
@@@ -2394,8 -2396,8 +2397,8 @@@
         kobject_put(&mod->mkobj.kobj);
    free_unload:
         module_unload_free(mod);
- -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
- -      percpu_modfree(mod->refptr);
+ +#if defined(CONFIG_MODULE_UNLOAD)
+ +      free_percpu(mod->refptr);
    free_init:
   #endif
         module_free(mod, mod->module_init);
diff --combined kernel/trace/ring_buffer.c

index eb6c898,8c1b2d2..0287f9f
--- 1/kernel/trace/ring_buffer.c
--- 2/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@@ -20,7 -20,6 +20,7 @@@
   #include <linux/cpu.h>
   #include <linux/fs.h>
   
+ +#include <asm/local.h>
   #include "trace.h"
   
   /*
@@@ -465,6 -464,8 +465,8 @@@ struct ring_buffer_iter 
         struct ring_buffer_per_cpu      *cpu_buffer;
         unsigned long                   head;
         struct buffer_page              *head_page;
+       struct buffer_page              *cache_reader_page;
+       unsigned long                   cache_read;
         u64                             read_stamp;
   };
   
@@@ -2717,6 -2718,8 +2719,8 @@@ static void rb_iter_reset(struct ring_b
                 iter->read_stamp = cpu_buffer->read_stamp;
         else
                 iter->read_stamp = iter->head_page->page->time_stamp;
+       iter->cache_reader_page = cpu_buffer->reader_page;
+       iter->cache_read = cpu_buffer->read;
   }
   
   /**
@@@ -2870,7 -2873,7 +2874,7 @@@ rb_get_reader_page(struct ring_buffer_p
          * Splice the empty reader page into the list around the head.
          */
         reader = rb_set_head_page(cpu_buffer);
-       cpu_buffer->reader_page->list.next = reader->list.next;
+       cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
         cpu_buffer->reader_page->list.prev = reader->list.prev;
   
         /*
@@@ -2907,7 -2910,7 +2911,7 @@@
          *
          * Now make the new head point back to the reader page.
          */
-       reader->list.next->prev = &cpu_buffer->reader_page->list;
+       rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
         rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
   
         /* Finally update the reader page to the new head */
@@@ -3061,13 -3064,22 +3065,22 @@@ rb_iter_peek(struct ring_buffer_iter *i
         struct ring_buffer_event *event;
         int nr_loops = 0;
   
         cpu_buffer = iter->cpu_buffer;
         buffer = cpu_buffer->buffer;
   
+       /*
+        * Check if someone performed a consuming read to
+        * the buffer. A consuming read invalidates the iterator
+        * and we need to reset the iterator in this case.
+        */
+       if (unlikely(iter->cache_read != cpu_buffer->read ||
+                    iter->cache_reader_page != cpu_buffer->reader_page))
+               rb_iter_reset(iter);
+ 
    again:
+       if (ring_buffer_iter_empty(iter))
+               return NULL;
+ 
         /*
          * We repeat when a timestamp is encountered.
          * We can get multiple timestamps by nested interrupts or also
@@@ -3082,6 -3094,11 +3095,11 @@@
         if (rb_per_cpu_empty(cpu_buffer))
                 return NULL;
   
+       if (iter->head >= local_read(&iter->head_page->page->commit)) {
+               rb_inc_iter(iter);
+               goto again;
+       }
+ 
         event = rb_iter_head_event(iter);
   
         switch (event->type_len) {
diff --combined kernel/trace/trace.c

index ab2bbb0,eac6875..667ba80
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -91,12 -91,12 +91,12 @@@ DEFINE_PER_CPU(int, ftrace_cpu_disabled
   static inline void ftrace_disable_cpu(void)
   {
         preempt_disable();
- -      __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
+ +      __this_cpu_inc(ftrace_cpu_disabled);
   }
   
   static inline void ftrace_enable_cpu(void)
   {
- -      __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
+ +      __this_cpu_dec(ftrace_cpu_disabled);
         preempt_enable();
   }
   
@@@ -951,6 -951,11 +951,11 @@@ void trace_find_cmdline(int pid, char c
                 return;
         }
   
+       if (WARN_ON_ONCE(pid < 0)) {
+               strcpy(comm, "<XXX>");
+               return;
+       }
+ 
         if (pid > PID_MAX_DEFAULT) {
                 strcpy(comm, "<...>");
                 return;
@@@ -1084,7 -1089,7 +1089,7 @@@ trace_function(struct trace_array *tr
         struct ftrace_entry *entry;
   
         /* If we are reading the ring buffer, don't trace */
- -      if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+ +      if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
                 return;
   
         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
diff --combined mm/page_alloc.c

index 6849e87,8deb9d0..9a7aaae
--- 1/mm/page_alloc.c
--- 2/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@@ -556,8 -556,9 +556,9 @@@ static void free_pcppages_bulk(struct z
                         page = list_entry(list->prev, struct page, lru);
                         /* must delete as __free_one_page list manipulates */
                         list_del(&page->lru);
-                       __free_one_page(page, zone, 0, migratetype);
-                       trace_mm_page_pcpu_drain(page, 0, migratetype);
+                       /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+                       __free_one_page(page, zone, 0, page_private(page));
+                       trace_mm_page_pcpu_drain(page, 0, page_private(page));
                 } while (--count && --batch_free && !list_empty(list));
         }
         spin_unlock(&zone->lock);
@@@ -1008,10 -1009,10 +1009,10 @@@ static void drain_pages(unsigned int cp
                 struct per_cpu_pageset *pset;
                 struct per_cpu_pages *pcp;
   
- -              pset = zone_pcp(zone, cpu);
+ +              local_irq_save(flags);
+ +              pset = per_cpu_ptr(zone->pageset, cpu);
   
                 pcp = &pset->pcp;
- -              local_irq_save(flags);
                 free_pcppages_bulk(zone, pcp->count, pcp);
                 pcp->count = 0;
                 local_irq_restore(flags);
@@@ -1095,6 -1096,7 +1096,6 @@@ static void free_hot_cold_page(struct p
         arch_free_page(page, 0);
         kernel_map_pages(page, 1, 0);
   
- -      pcp = &zone_pcp(zone, get_cpu())->pcp;
         migratetype = get_pageblock_migratetype(page);
         set_page_private(page, migratetype);
         local_irq_save(flags);
@@@ -1117,7 -1119,6 +1118,7 @@@
                 migratetype = MIGRATE_MOVABLE;
         }
   
+ +      pcp = &this_cpu_ptr(zone->pageset)->pcp;
         if (cold)
                 list_add_tail(&page->lru, &pcp->lists[migratetype]);
         else
@@@ -1130,6 -1131,7 +1131,6 @@@
   
   out:
         local_irq_restore(flags);
- -      put_cpu();
   }
   
   void free_hot_page(struct page *page)
@@@ -1179,15 -1181,17 +1180,15 @@@ struct page *buffered_rmqueue(struct zo
         unsigned long flags;
         struct page *page;
         int cold = !!(gfp_flags & __GFP_COLD);
- -      int cpu;
   
   again:
- -      cpu  = get_cpu();
         if (likely(order == 0)) {
                 struct per_cpu_pages *pcp;
                 struct list_head *list;
   
- -              pcp = &zone_pcp(zone, cpu)->pcp;
- -              list = &pcp->lists[migratetype];
                 local_irq_save(flags);
+ +              pcp = &this_cpu_ptr(zone->pageset)->pcp;
+ +              list = &pcp->lists[migratetype];
                 if (list_empty(list)) {
                         pcp->count += rmqueue_bulk(zone, 0,
                                         pcp->batch, list,
@@@ -1219,15 -1223,16 +1220,15 @@@
                 }
                 spin_lock_irqsave(&zone->lock, flags);
                 page = __rmqueue(zone, order, migratetype);
                 spin_unlock(&zone->lock);
                 if (!page)
                         goto failed;
+               __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
         }
   
         __count_zone_vm_events(PGALLOC, zone, 1 << order);
         zone_statistics(preferred_zone, zone);
         local_irq_restore(flags);
- -      put_cpu();
   
         VM_BUG_ON(bad_range(zone, page));
         if (prep_new_page(page, order, gfp_flags))
@@@ -1236,6 -1241,7 +1237,6 @@@
   
   failed:
         local_irq_restore(flags);
- -      put_cpu();
         return NULL;
   }
   
@@@ -2174,7 -2180,7 +2175,7 @@@ void show_free_areas(void
                 for_each_online_cpu(cpu) {
                         struct per_cpu_pageset *pageset;
   
- -                      pageset = zone_pcp(zone, cpu);
+ +                      pageset = per_cpu_ptr(zone->pageset, cpu);
   
                         printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
                                cpu, pageset->pcp.high,
@@@ -2739,29 -2745,10 +2740,29 @@@ static void build_zonelist_cache(pg_dat
   
   #endif        /* CONFIG_NUMA */
   
+ +/*
+ + * Boot pageset table. One per cpu which is going to be used for all
+ + * zones and all nodes. The parameters will be set in such a way
+ + * that an item put on a list will immediately be handed over to
+ + * the buddy list. This is safe since pageset manipulation is done
+ + * with interrupts disabled.
+ + *
+ + * The boot_pagesets must be kept even after bootup is complete for
+ + * unused processors and/or zones. They do play a role for bootstrapping
+ + * hotplugged processors.
+ + *
+ + * zoneinfo_show() and maybe other functions do
+ + * not check if the processor is online before following the pageset pointer.
+ + * Other parts of the kernel may not check if the zone is available.
+ + */
+ +static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
+ +static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
+ +
   /* return values int ....just for stop_machine() */
   static int __build_all_zonelists(void *dummy)
   {
         int nid;
+ +      int cpu;
   
   #ifdef CONFIG_NUMA
         memset(node_load, 0, sizeof(node_load));
@@@ -2772,23 -2759,6 +2773,23 @@@
                 build_zonelists(pgdat);
                 build_zonelist_cache(pgdat);
         }
+ +
+ +      /*
+ +       * Initialize the boot_pagesets that are going to be used
+ +       * for bootstrapping processors. The real pagesets for
+ +       * each zone will be allocated later when the per cpu
+ +       * allocator is available.
+ +       *
+ +       * boot_pagesets are used also for bootstrapping offline
+ +       * cpus if the system is already booted because the pagesets
+ +       * are needed to initialize allocators on a specific cpu too.
+ +       * F.e. the percpu allocator needs the page allocator which
+ +       * needs the percpu allocator in order to allocate its pagesets
+ +       * (a chicken-egg dilemma).
+ +       */
+ +      for_each_possible_cpu(cpu)
+ +              setup_pageset(&per_cpu(boot_pageset, cpu), 0);
+ +
         return 0;
   }
   
@@@ -3126,33 -3096,121 +3127,33 @@@ static void setup_pagelist_highmark(str
                 pcp->batch = PAGE_SHIFT * 8;
   }
   
- -
- -#ifdef CONFIG_NUMA
- -/*
- - * Boot pageset table. One per cpu which is going to be used for all
- - * zones and all nodes. The parameters will be set in such a way
- - * that an item put on a list will immediately be handed over to
- - * the buddy list. This is safe since pageset manipulation is done
- - * with interrupts disabled.
- - *
- - * Some NUMA counter updates may also be caught by the boot pagesets.
- - *
- - * The boot_pagesets must be kept even after bootup is complete for
- - * unused processors and/or zones. They do play a role for bootstrapping
- - * hotplugged processors.
- - *
- - * zoneinfo_show() and maybe other functions do
- - * not check if the processor is online before following the pageset pointer.
- - * Other parts of the kernel may not check if the zone is available.
- - */
- -static struct per_cpu_pageset boot_pageset[NR_CPUS];
- -
   /*
- - * Dynamically allocate memory for the
- - * per cpu pageset array in struct zone.
+ + * Allocate per cpu pagesets and initialize them.
+ + * Before this call only boot pagesets were available.
+ + * Boot pagesets will no longer be used by this processorr
+ + * after setup_per_cpu_pageset().
    */
- -static int __cpuinit process_zones(int cpu)
+ +void __init setup_per_cpu_pageset(void)
   {
- -      struct zone *zone, *dzone;
- -      int node = cpu_to_node(cpu);
- -
- -      node_set_state(node, N_CPU);    /* this node has a cpu */
+ +      struct zone *zone;
+ +      int cpu;
   
         for_each_populated_zone(zone) {
- -              zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
- -                                       GFP_KERNEL, node);
- -              if (!zone_pcp(zone, cpu))
- -                      goto bad;
- -
- -              setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
- -
- -              if (percpu_pagelist_fraction)
- -                      setup_pagelist_highmark(zone_pcp(zone, cpu),
- -                          (zone->present_pages / percpu_pagelist_fraction));
- -      }
- -
- -      return 0;
- -bad:
- -      for_each_zone(dzone) {
- -              if (!populated_zone(dzone))
- -                      continue;
- -              if (dzone == zone)
- -                      break;
- -              kfree(zone_pcp(dzone, cpu));
- -              zone_pcp(dzone, cpu) = &boot_pageset[cpu];
- -      }
- -      return -ENOMEM;
- -}
+ +              zone->pageset = alloc_percpu(struct per_cpu_pageset);
   
- -static inline void free_zone_pagesets(int cpu)
- -{
- -      struct zone *zone;
- -
- -      for_each_zone(zone) {
- -              struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
+ +              for_each_possible_cpu(cpu) {
+ +                      struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
   
- -              /* Free per_cpu_pageset if it is slab allocated */
- -              if (pset != &boot_pageset[cpu])
- -                      kfree(pset);
- -              zone_pcp(zone, cpu) = &boot_pageset[cpu];
- -      }
- -}
+ +                      setup_pageset(pcp, zone_batchsize(zone));
   
- -static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
- -              unsigned long action,
- -              void *hcpu)
- -{
- -      int cpu = (long)hcpu;
- -      int ret = NOTIFY_OK;
- -
- -      switch (action) {
- -      case CPU_UP_PREPARE:
- -      case CPU_UP_PREPARE_FROZEN:
- -              if (process_zones(cpu))
- -                      ret = NOTIFY_BAD;
- -              break;
- -      case CPU_UP_CANCELED:
- -      case CPU_UP_CANCELED_FROZEN:
- -      case CPU_DEAD:
- -      case CPU_DEAD_FROZEN:
- -              free_zone_pagesets(cpu);
- -              break;
- -      default:
- -              break;
+ +                      if (percpu_pagelist_fraction)
+ +                              setup_pagelist_highmark(pcp,
+ +                                      (zone->present_pages /
+ +                                              percpu_pagelist_fraction));
+ +              }
         }
- -      return ret;
   }
   
- -static struct notifier_block __cpuinitdata pageset_notifier =
- -      { &pageset_cpuup_callback, NULL, 0 };
- -
- -void __init setup_per_cpu_pageset(void)
- -{
- -      int err;
- -
- -      /* Initialize per_cpu_pageset for cpu 0.
- -       * A cpuup callback will do this for every cpu
- -       * as it comes online
- -       */
- -      err = process_zones(smp_processor_id());
- -      BUG_ON(err);
- -      register_cpu_notifier(&pageset_notifier);
- -}
- -
- -#endif
- -
   static noinline __init_refok
   int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
   {
@@@ -3206,7 -3264,7 +3207,7 @@@ static int __zone_pcp_update(void *data
                 struct per_cpu_pageset *pset;
                 struct per_cpu_pages *pcp;
   
- -              pset = zone_pcp(zone, cpu);
+ +              pset = per_cpu_ptr(zone->pageset, cpu);
                 pcp = &pset->pcp;
   
                 local_irq_save(flags);
@@@ -3224,17 -3282,21 +3225,17 @@@ void zone_pcp_update(struct zone *zone
   
   static __meminit void zone_pcp_init(struct zone *zone)
   {
- -      int cpu;
- -      unsigned long batch = zone_batchsize(zone);
+ +      /*
+ +       * per cpu subsystem is not up at this point. The following code
+ +       * relies on the ability of the linker to provide the
+ +       * offset of a (static) per cpu variable into the per cpu area.
+ +       */
+ +      zone->pageset = &boot_pageset;
   
- -      for (cpu = 0; cpu < NR_CPUS; cpu++) {
- -#ifdef CONFIG_NUMA
- -              /* Early boot. Slab allocator not functional yet */
- -              zone_pcp(zone, cpu) = &boot_pageset[cpu];
- -              setup_pageset(&boot_pageset[cpu],0);
- -#else
- -              setup_pageset(zone_pcp(zone,cpu), batch);
- -#endif
- -      }
         if (zone->present_pages)
- -              printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
- -                      zone->name, zone->present_pages, batch);
+ +              printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%u\n",
+ +                      zone->name, zone->present_pages,
+ +                                       zone_batchsize(zone));
   }
   
   __meminit int init_currently_empty_zone(struct zone *zone,
@@@ -3937,7 -3999,7 +3938,7 @@@ void __init add_active_range(unsigned i
                 }
   
                 /* Merge backward if suitable */
-               if (start_pfn < early_node_map[i].end_pfn &&
+               if (start_pfn < early_node_map[i].start_pfn &&
                                 end_pfn >= early_node_map[i].start_pfn) {
                         early_node_map[i].start_pfn = start_pfn;
                         return;
@@@ -4748,11 -4810,10 +4749,11 @@@ int percpu_pagelist_fraction_sysctl_han
         if (!write || (ret == -EINVAL))
                 return ret;
         for_each_populated_zone(zone) {
- -              for_each_online_cpu(cpu) {
+ +              for_each_possible_cpu(cpu) {
                         unsigned long  high;
                         high = zone->present_pages / percpu_pagelist_fraction;
- -                      setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+ +                      setup_pagelist_highmark(
+ +                              per_cpu_ptr(zone->pageset, cpu), high);
                 }
         }
         return 0;
diff --combined mm/percpu.c

index 626e43c,083e7c9..b336638
--- 1/mm/percpu.c
--- 2/mm/percpu.c
+++ b/mm/percpu.c
@@@ -913,10 -913,11 +913,10 @@@ static void pcpu_depopulate_chunk(struc
         int rs, re;
   
         /* quick path, check whether it's empty already */
- -      pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
- -              if (rs == page_start && re == page_end)
- -                      return;
- -              break;
- -      }
+ +      rs = page_start;
+ +      pcpu_next_unpop(chunk, &rs, &re, page_end);
+ +      if (rs == page_start && re == page_end)
+ +              return;
   
         /* immutable chunks can't be depopulated */
         WARN_ON(chunk->immutable);
@@@ -967,10 -968,11 +967,10 @@@ static int pcpu_populate_chunk(struct p
         int rs, re, rc;
   
         /* quick path, check whether all pages are already there */
- -      pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) {
- -              if (rs == page_start && re == page_end)
- -                      goto clear;
- -              break;
- -      }
+ +      rs = page_start;
+ +      pcpu_next_pop(chunk, &rs, &re, page_end);
+ +      if (rs == page_start && re == page_end)
+ +              goto clear;
   
         /* need to allocate and map pages, this chunk can't be immutable */
         WARN_ON(chunk->immutable);
@@@ -1269,7 -1271,7 +1269,7 @@@ static void pcpu_reclaim(struct work_st
    */
   void free_percpu(void *ptr)
   {
-       void *addr = __pcpu_ptr_to_addr(ptr);
+       void *addr;
         struct pcpu_chunk *chunk;
         unsigned long flags;
         int off;
@@@ -1277,6 -1279,8 +1277,8 @@@
         if (!ptr)
                 return;
   
+       addr = __pcpu_ptr_to_addr(ptr);
+ 
         spin_lock_irqsave(&pcpu_lock, flags);
   
         chunk = pcpu_chunk_addr_search(addr);
author	Tejun Heo <tj@kernel.org>
	Tue, 2 Feb 2010 05:38:15 +0000 (14:38 +0900)
committer	Tejun Heo <tj@kernel.org>
	Tue, 2 Feb 2010 05:38:15 +0000 (14:38 +0900)
		1	2
arch/sparc/kernel/nmi.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/module.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/ring_buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page_alloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/percpu.c	patch \|	diff1 \|	diff2 \|	blob \| history