Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

author Rusty Russell <rusty@rustcorp.com.au>

Mon, 29 Dec 2008 21:32:35 +0000 (08:02 +1030)

committer Rusty Russell <rusty@rustcorp.com.au>

Mon, 29 Dec 2008 21:32:35 +0000 (08:02 +1030)
author Rusty Russell <rusty@rustcorp.com.au>
Mon, 29 Dec 2008 21:32:35 +0000 (08:02 +1030)
committer Rusty Russell <rusty@rustcorp.com.au>
Mon, 29 Dec 2008 21:32:35 +0000 (08:02 +1030)
diff --combined arch/m32r/Kconfig

index 17a6dab,29047d5..cabba33
--- 1/arch/m32r/Kconfig
--- 2/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@@ -10,7 -10,6 +10,7 @@@ config M32
         default y
         select HAVE_IDE
         select HAVE_OPROFILE
+ +      select INIT_ALL_POSSIBLE
   
   config SBUS
         bool
@@@ -274,7 -273,7 +274,7 @@@ config GENERIC_CALIBRATE_DELA
         bool
         default y
   
- config SCHED_NO_NO_OMIT_FRAME_POINTER
+ config SCHED_OMIT_FRAME_POINTER
           bool
           default y
   
diff --combined arch/powerpc/kernel/smp.c

index d116556,8ac3f72..65484b2
--- 1/arch/powerpc/kernel/smp.c
--- 2/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@@ -57,12 -57,15 +57,11 @@@
   #define DBG(fmt...)
   #endif
   
- int smp_hw_index[NR_CPUS];
   struct thread_info *secondary_ti;
   
   DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
   DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
   
- -EXPORT_SYMBOL(cpu_online_map);
- -EXPORT_SYMBOL(cpu_possible_map);
   EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
   EXPORT_PER_CPU_SYMBOL(cpu_core_map);
   
@@@ -119,6 -122,65 +118,65 @@@ void smp_message_recv(int msg
         }
   }
   
+ static irqreturn_t call_function_action(int irq, void *data)
+ {
+       generic_smp_call_function_interrupt();
+       return IRQ_HANDLED;
+ }
+ 
+ static irqreturn_t reschedule_action(int irq, void *data)
+ {
+       /* we just need the return path side effect of checking need_resched */
+       return IRQ_HANDLED;
+ }
+ 
+ static irqreturn_t call_function_single_action(int irq, void *data)
+ {
+       generic_smp_call_function_single_interrupt();
+       return IRQ_HANDLED;
+ }
+ 
+ static irqreturn_t debug_ipi_action(int irq, void *data)
+ {
+       smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
+       return IRQ_HANDLED;
+ }
+ 
+ static irq_handler_t smp_ipi_action[] = {
+       [PPC_MSG_CALL_FUNCTION] =  call_function_action,
+       [PPC_MSG_RESCHEDULE] = reschedule_action,
+       [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+       [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+ };
+ 
+ const char *smp_ipi_name[] = {
+       [PPC_MSG_CALL_FUNCTION] =  "ipi call function",
+       [PPC_MSG_RESCHEDULE] = "ipi reschedule",
+       [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+       [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+ };
+ 
+ /* optional function to request ipi, for controllers with >= 4 ipis */
+ int smp_request_message_ipi(int virq, int msg)
+ {
+       int err;
+ 
+       if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+               return -EINVAL;
+       }
+ #if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
+       if (msg == PPC_MSG_DEBUGGER_BREAK) {
+               return 1;
+       }
+ #endif
+       err = request_irq(virq, smp_ipi_action[msg], IRQF_DISABLED|IRQF_PERCPU,
+                         smp_ipi_name[msg], 0);
+       WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
+               virq, smp_ipi_name[msg], err);
+ 
+       return err;
+ }
+ 
   void smp_send_reschedule(int cpu)
   {
         if (likely(smp_ops))
@@@ -404,8 -466,7 +462,7 @@@ out
   static struct device_node *cpu_to_l2cache(int cpu)
   {
         struct device_node *np;
-       const phandle *php;
-       phandle ph;
+       struct device_node *cache;
   
         if (!cpu_present(cpu))
                 return NULL;
@@@ -414,13 -475,11 +471,11 @@@
         if (np == NULL)
                 return NULL;
   
-       php = of_get_property(np, "l2-cache", NULL);
-       if (php == NULL)
-               return NULL;
-       ph = *php;
+       cache = of_find_next_cache_node(np);
+ 
         of_node_put(np);
   
-       return of_find_node_by_phandle(ph);
+       return cache;
   }
   
   /* Activate a secondary processor. */
diff --combined arch/powerpc/kernel/time.c

index 6f39d35,e1f3a51..99f1ddd
--- 1/arch/powerpc/kernel/time.c
--- 2/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@@ -164,8 -164,6 +164,6 @@@ static u64 tb_to_ns_scale __read_mostly
   static unsigned tb_to_ns_shift __read_mostly;
   static unsigned long boot_tb __read_mostly;
   
- static struct gettimeofday_struct do_gtod;
- 
   extern struct timezone sys_tz;
   static long timezone_offset;
   
@@@ -415,31 -413,9 +413,9 @@@ void udelay(unsigned long usecs
   }
   EXPORT_SYMBOL(udelay);
   
   static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
                                u64 new_tb_to_xs)
   {
-       unsigned temp_idx;
-       struct gettimeofday_vars *temp_varp;
- 
-       temp_idx = (do_gtod.var_idx == 0);
-       temp_varp = &do_gtod.vars[temp_idx];
- 
-       temp_varp->tb_to_xs = new_tb_to_xs;
-       temp_varp->tb_orig_stamp = new_tb_stamp;
-       temp_varp->stamp_xsec = new_stamp_xsec;
-       smp_mb();
-       do_gtod.varp = temp_varp;
-       do_gtod.var_idx = temp_idx;
- 
         /*
          * tb_update_count is used to allow the userspace gettimeofday code
          * to assure itself that it sees a consistent view of the tb_to_xs and
@@@ -456,6 -432,7 +432,7 @@@
         vdso_data->tb_to_xs = new_tb_to_xs;
         vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
         vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
+       vdso_data->stamp_xtime = xtime;
         smp_wmb();
         ++(vdso_data->tb_update_count);
   }
@@@ -514,9 -491,7 +491,7 @@@ static int __init iSeries_tb_recal(void
                                 tb_ticks_per_sec   = new_tb_ticks_per_sec;
                                 calc_cputime_factors();
                                 div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
-                               do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
                                 tb_to_xs = divres.result_low;
-                               do_gtod.varp->tb_to_xs = tb_to_xs;
                                 vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
                                 vdso_data->tb_to_xs = tb_to_xs;
                         }
@@@ -869,7 -844,7 +844,7 @@@ static void register_decrementer_clocke
         struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
   
         *dec = decrementer_clockevent;
- -      dec->cpumask = cpumask_of_cpu(cpu);
+ +      dec->cpumask = cpumask_of(cpu);
   
         printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
                dec->name, dec->mult, dec->shift, cpu);
@@@ -988,15 -963,6 +963,6 @@@ void __init time_init(void
                 sys_tz.tz_dsttime = 0;
           }
   
-       do_gtod.varp = &do_gtod.vars[0];
-       do_gtod.var_idx = 0;
-       do_gtod.varp->tb_orig_stamp = tb_last_jiffy;
-       __get_cpu_var(last_jiffy) = tb_last_jiffy;
-       do_gtod.varp->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC;
-       do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
-       do_gtod.varp->tb_to_xs = tb_to_xs;
-       do_gtod.tb_to_us = tb_to_us;
- 
         vdso_data->tb_orig_stamp = tb_last_jiffy;
         vdso_data->tb_update_count = 0;
         vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
diff --combined arch/powerpc/platforms/pseries/xics.c

index 424b335,f7a6902..84e058f
--- 1/arch/powerpc/platforms/pseries/xics.c
--- 2/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@@ -332,7 -332,7 +332,7 @@@ static void xics_eoi_lpar(unsigned int 
         lpar_xirr_info_set((0xff << 24) | irq);
   }
   
- -static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+ +static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
   {
         unsigned int irq;
         int status;
@@@ -579,7 -579,7 +579,7 @@@ static void xics_update_irq_servers(voi
         int i, j;
         struct device_node *np;
         u32 ilen;
-       const u32 *ireg, *isize;
+       const u32 *ireg;
         u32 hcpuid;
   
         /* Find the server numbers for the boot cpu. */
@@@ -607,11 -607,6 +607,6 @@@
                 }
         }
   
-       /* get the bit size of server numbers */
-       isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
-       if (isize)
-               interrupt_server_size = *isize;
- 
         of_node_put(np);
   }
   
@@@ -682,6 -677,7 +677,7 @@@ void __init xics_init_IRQ(void
         struct device_node *np;
         u32 indx = 0;
         int found = 0;
+       const u32 *isize;
   
         ppc64_boot_msg(0x20, "XICS Init");
   
@@@ -701,6 -697,26 +697,26 @@@
         if (found == 0)
                 return;
   
+       /* get the bit size of server numbers */
+       found = 0;
+ 
+       for_each_compatible_node(np, NULL, "ibm,ppc-xics") {
+               isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+ 
+               if (!isize)
+                       continue;
+ 
+               if (!found) {
+                       interrupt_server_size = *isize;
+                       found = 1;
+               } else if (*isize != interrupt_server_size) {
+                       printk(KERN_WARNING "XICS: "
+                              "mismatched ibm,interrupt-server#-size\n");
+                       interrupt_server_size = max(*isize,
+                                                   interrupt_server_size);
+               }
+       }
+ 
         xics_update_irq_servers();
         xics_init_host();
   
@@@ -728,9 -744,18 +744,18 @@@ static void xics_set_cpu_priority(unsig
   /* Have the calling processor join or leave the specified global queue */
   static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
   {
-       int status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE,
-               (1UL << interrupt_server_size) - 1 - gserver, join);
-       WARN_ON(status < 0);
+       int index;
+       int status;
+ 
+       if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+               return;
+ 
+       index = (1UL << interrupt_server_size) - 1 - gserver;
+ 
+       status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+ 
+       WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+            GLOBAL_INTERRUPT_QUEUE, index, join, status);
   }
   
   void xics_setup_cpu(void)
@@@ -845,7 -870,7 +870,7 @@@ void xics_migrate_irqs_away(void
   
                 /* Reset affinity to all cpus */
                 irq_desc[virq].affinity = CPU_MASK_ALL;
- -              desc->chip->set_affinity(virq, CPU_MASK_ALL);
+ +              desc->chip->set_affinity(virq, cpu_all_mask);
   unlock:
                 spin_unlock_irqrestore(&desc->lock, flags);
         }
diff --combined arch/powerpc/sysdev/mpic.c

index 5d7f9f0,c82babb..3e0d89d
--- 1/arch/powerpc/sysdev/mpic.c
--- 2/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@@ -661,17 -661,6 +661,6 @@@ static inline void mpic_eoi(struct mpi
         (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
   }
   
- #ifdef CONFIG_SMP
- static irqreturn_t mpic_ipi_action(int irq, void *data)
- {
-       long ipi = (long)data;
- 
-       smp_message_recv(ipi);
- 
-       return IRQ_HANDLED;
- }
- #endif /* CONFIG_SMP */
- 
   /*
    * Linux descriptor level callbacks
    */
@@@ -817,7 -806,7 +806,7 @@@ static void mpic_end_ipi(unsigned int i
   
   #endif /* CONFIG_SMP */
   
- -void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+ +void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
   {
         struct mpic *mpic = mpic_from_irq(irq);
         unsigned int src = mpic_irq_to_hw(irq);
@@@ -829,7 -818,7 +818,7 @@@
         } else {
                 cpumask_t tmp;
   
- -              cpus_and(tmp, cpumask, cpu_online_map);
+ +              cpumask_and(&tmp, cpumask, cpu_online_mask);
   
                 mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
                                mpic_physmask(cpus_addr(tmp)[0]));
@@@ -1548,13 -1537,7 +1537,7 @@@ unsigned int mpic_get_mcirq(void
   void mpic_request_ipis(void)
   {
         struct mpic *mpic = mpic_primary;
-       long i, err;
-       static char *ipi_names[] = {
-               "IPI0 (call function)",
-               "IPI1 (reschedule)",
-               "IPI2 (call function single)",
-               "IPI3 (debugger break)",
-       };
+       int i;
         BUG_ON(mpic == NULL);
   
         printk(KERN_INFO "mpic: requesting IPIs ... \n");
@@@ -1563,17 -1546,10 +1546,10 @@@
                 unsigned int vipi = irq_create_mapping(mpic->irqhost,
                                                        mpic->ipi_vecs[0] + i);
                 if (vipi == NO_IRQ) {
-                       printk(KERN_ERR "Failed to map IPI %ld\n", i);
-                       break;
-               }
-               err = request_irq(vipi, mpic_ipi_action,
-                                 IRQF_DISABLED|IRQF_PERCPU,
-                                 ipi_names[i], (void *)i);
-               if (err) {
-                       printk(KERN_ERR "Request of irq %d for IPI %ld failed\n",
-                              vipi, i);
-                       break;
+                       printk(KERN_ERR "Failed to map %s\n", smp_ipi_name[i]);
+                       continue;
                 }
+               smp_request_message_ipi(vipi, i);
         }
   }
   
diff --combined arch/s390/Kconfig

index b4aa586,8152fef..19577ae
--- 1/arch/s390/Kconfig
--- 2/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@@ -43,6 -43,9 +43,9 @@@ config GENERIC_HWEIGH
   config GENERIC_TIME
         def_bool y
   
+ config GENERIC_TIME_VSYSCALL
+       def_bool y
+ 
   config GENERIC_CLOCKEVENTS
         def_bool y
   
@@@ -66,16 -69,20 +69,21 @@@ config PGST
         bool
         default y if KVM
   
+ config VIRT_CPU_ACCOUNTING
+       def_bool y
+ 
   mainmenu "Linux Kernel Configuration"
   
   config S390
         def_bool y
+       select USE_GENERIC_SMP_HELPERS if SMP
+       select HAVE_FUNCTION_TRACER
         select HAVE_OPROFILE
         select HAVE_KPROBES
         select HAVE_KRETPROBES
         select HAVE_KVM if 64BIT
         select HAVE_ARCH_TRACEHOOK
+ +      select INIT_ALL_POSSIBLE
   
   source "init/Kconfig"
   
@@@ -226,6 -233,14 +234,14 @@@ config MARCH_Z9_10
           Class (z9 BC). The kernel will be slightly faster but will not
           work on older machines such as the z990, z890, z900, and z800.
   
+ config MARCH_Z10
+       bool "IBM System z10"
+       help
+         Select this to enable optimizations for IBM System z10. The
+         kernel will be slightly faster but will not work on older
+         machines such as the z990, z890, z900, z800, z9-109, z9-ec
+         and z9-bc.
+ 
   endchoice
   
   config PACK_STACK
@@@ -344,16 -359,6 +360,6 @@@ config QDI
   
           If unsure, say Y.
   
- config QDIO_DEBUG
-       bool "Extended debugging information"
-       depends on QDIO
-       help
-         Say Y here to get extended debugging output in
-           /sys/kernel/debug/s390dbf/qdio...
-         Warning: this option reduces the performance of the QDIO module.
- 
-         If unsure, say N.
- 
   config CHSC_SCH
         tristate "Support for CHSC subchannels"
         help
@@@ -467,22 -472,9 +473,9 @@@ config PAGE_STATE
           hypervisor. The ESSA instruction is used to do the states
           changes between a page that has content and the unused state.
   
- config VIRT_TIMER
-       bool "Virtual CPU timer support"
-       help
-         This provides a kernel interface for virtual CPU timers.
-         Default is disabled.
- 
- config VIRT_CPU_ACCOUNTING
-       bool "Base user process accounting on virtual cpu timer"
-       depends on VIRT_TIMER
-       help
-         Select this option to use CPU timer deltas to do user
-         process accounting.
- 
   config APPLDATA_BASE
         bool "Linux - VM Monitor Stream, base infrastructure"
-       depends on PROC_FS && VIRT_TIMER=y
+       depends on PROC_FS
         help
           This provides a kernel interface for creating and updating z/VM APPLDATA
           monitor records. The monitor records are updated at certain time
diff --combined arch/s390/kernel/smp.c

index f03914b,6fc7854..3ed5c7a
--- 1/arch/s390/kernel/smp.c
--- 2/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@@ -20,6 -20,9 +20,9 @@@
    * cpu_number_map in other architectures.
    */
   
+ #define KMSG_COMPONENT "cpu"
+ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+ 
   #include <linux/module.h>
   #include <linux/init.h>
   #include <linux/mm.h>
@@@ -52,6 -55,12 +55,6 @@@
   struct _lowcore *lowcore_ptr[NR_CPUS];
   EXPORT_SYMBOL(lowcore_ptr);
   
- -cpumask_t cpu_online_map = CPU_MASK_NONE;
- -EXPORT_SYMBOL(cpu_online_map);
- -
- -cpumask_t cpu_possible_map = CPU_MASK_ALL;
- -EXPORT_SYMBOL(cpu_possible_map);
- -
   static struct task_struct *current_set[NR_CPUS];
   
   static u8 smp_cpu_type;
@@@ -71,159 -80,6 +74,6 @@@ static DEFINE_PER_CPU(struct cpu, cpu_d
   
   static void smp_ext_bitcall(int, ec_bit_sig);
   
- /*
-  * Structure and data for __smp_call_function_map(). This is designed to
-  * minimise static memory requirements. It also looks cleaner.
-  */
- static DEFINE_SPINLOCK(call_lock);
- 
- struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       cpumask_t started;
-       cpumask_t finished;
-       int wait;
- };
- 
- static struct call_data_struct *call_data;
- 
- /*
-  * 'Call function' interrupt callback
-  */
- static void do_call_function(void)
- {
-       void (*func) (void *info) = call_data->func;
-       void *info = call_data->info;
-       int wait = call_data->wait;
- 
-       cpu_set(smp_processor_id(), call_data->started);
-       (*func)(info);
-       if (wait)
-               cpu_set(smp_processor_id(), call_data->finished);;
- }
- 
- static void __smp_call_function_map(void (*func) (void *info), void *info,
-                                   int wait, cpumask_t map)
- {
-       struct call_data_struct data;
-       int cpu, local = 0;
- 
-       /*
-        * Can deadlock when interrupts are disabled or if in wrong context.
-        */
-       WARN_ON(irqs_disabled() || in_irq());
- 
-       /*
-        * Check for local function call. We have to have the same call order
-        * as in on_each_cpu() because of machine_restart_smp().
-        */
-       if (cpu_isset(smp_processor_id(), map)) {
-               local = 1;
-               cpu_clear(smp_processor_id(), map);
-       }
- 
-       cpus_and(map, map, cpu_online_map);
-       if (cpus_empty(map))
-               goto out;
- 
-       data.func = func;
-       data.info = info;
-       data.started = CPU_MASK_NONE;
-       data.wait = wait;
-       if (wait)
-               data.finished = CPU_MASK_NONE;
- 
-       call_data = &data;
- 
-       for_each_cpu_mask(cpu, map)
-               smp_ext_bitcall(cpu, ec_call_function);
- 
-       /* Wait for response */
-       while (!cpus_equal(map, data.started))
-               cpu_relax();
-       if (wait)
-               while (!cpus_equal(map, data.finished))
-                       cpu_relax();
- out:
-       if (local) {
-               local_irq_disable();
-               func(info);
-               local_irq_enable();
-       }
- }
- 
- /*
-  * smp_call_function:
-  * @func: the function to run; this must be fast and non-blocking
-  * @info: an arbitrary pointer to pass to the function
-  * @wait: if true, wait (atomically) until function has completed on other CPUs
-  *
-  * Run a function on all other CPUs.
-  *
-  * You must not call this function with disabled interrupts, from a
-  * hardware interrupt handler or from a bottom half.
-  */
- int smp_call_function(void (*func) (void *info), void *info, int wait)
- {
-       cpumask_t map;
- 
-       spin_lock(&call_lock);
-       map = cpu_online_map;
-       cpu_clear(smp_processor_id(), map);
-       __smp_call_function_map(func, info, wait, map);
-       spin_unlock(&call_lock);
-       return 0;
- }
- EXPORT_SYMBOL(smp_call_function);
- 
- /*
-  * smp_call_function_single:
-  * @cpu: the CPU where func should run
-  * @func: the function to run; this must be fast and non-blocking
-  * @info: an arbitrary pointer to pass to the function
-  * @wait: if true, wait (atomically) until function has completed on other CPUs
-  *
-  * Run a function on one processor.
-  *
-  * You must not call this function with disabled interrupts, from a
-  * hardware interrupt handler or from a bottom half.
-  */
- int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-                            int wait)
- {
-       spin_lock(&call_lock);
-       __smp_call_function_map(func, info, wait, cpumask_of_cpu(cpu));
-       spin_unlock(&call_lock);
-       return 0;
- }
- EXPORT_SYMBOL(smp_call_function_single);
- 
- /**
-  * smp_call_function_mask(): Run a function on a set of other CPUs.
-  * @mask: The set of cpus to run on.  Must not include the current cpu.
-  * @func: The function to run. This must be fast and non-blocking.
-  * @info: An arbitrary pointer to pass to the function.
-  * @wait: If true, wait (atomically) until function has completed on other CPUs.
-  *
-  * Returns 0 on success, else a negative status code.
-  *
-  * If @wait is true, then returns once @func has returned; otherwise
-  * it returns just before the target cpu calls @func.
-  *
-  * You must not call this function with disabled interrupts or from a
-  * hardware interrupt handler or from a bottom half handler.
-  */
- int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
-                          int wait)
- {
-       spin_lock(&call_lock);
-       cpu_clear(smp_processor_id(), mask);
-       __smp_call_function_map(func, info, wait, mask);
-       spin_unlock(&call_lock);
-       return 0;
- }
- EXPORT_SYMBOL(smp_call_function_mask);
- 
   void smp_send_stop(void)
   {
         int cpu, rc;
@@@ -265,7 -121,10 +115,10 @@@ static void do_ext_call_interrupt(__u1
         bits = xchg(&S390_lowcore.ext_call_fast, 0);
   
         if (test_bit(ec_call_function, &bits))
-               do_call_function();
+               generic_smp_call_function_interrupt();
+ 
+       if (test_bit(ec_call_function_single, &bits))
+               generic_smp_call_function_single_interrupt();
   }
   
   /*
@@@ -282,6 -141,19 +135,19 @@@ static void smp_ext_bitcall(int cpu, ec
                 udelay(10);
   }
   
+ void arch_send_call_function_ipi(cpumask_t mask)
+ {
+       int cpu;
+ 
+       for_each_cpu_mask(cpu, mask)
+               smp_ext_bitcall(cpu, ec_call_function);
+ }
+ 
+ void arch_send_call_function_single_ipi(int cpu)
+ {
+       smp_ext_bitcall(cpu, ec_call_function_single);
+ }
+ 
   #ifndef CONFIG_64BIT
   /*
    * this function sends a 'purge tlb' signal to another CPU.
@@@ -382,8 -254,8 +248,8 @@@ static void __init smp_get_save_area(un
         if (ipl_info.type != IPL_TYPE_FCP_DUMP)
                 return;
         if (cpu >= NR_CPUS) {
-               printk(KERN_WARNING "Registers for cpu %i not saved since dump "
-                      "kernel was compiled with NR_CPUS=%i\n", cpu, NR_CPUS);
+               pr_warning("CPU %i exceeds the maximum %i and is excluded from "
+                          "the dump\n", cpu, NR_CPUS - 1);
                 return;
         }
         zfcpdump_save_areas[cpu] = kmalloc(sizeof(union save_area), GFP_KERNEL);
@@@ -556,7 -428,7 +422,7 @@@ static void __init smp_detect_cpus(void
         }
   out:
         kfree(info);
-       printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus);
+       pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
         get_online_cpus();
         __smp_rescan_cpus();
         put_online_cpus();
@@@ -572,19 -444,17 +438,17 @@@ int __cpuinit start_secondary(void *cpu
         preempt_disable();
         /* Enable TOD clock interrupts on the secondary cpu. */
         init_cpu_timer();
- #ifdef CONFIG_VIRT_TIMER
         /* Enable cpu timer interrupts on the secondary cpu. */
         init_cpu_vtimer();
- #endif
         /* Enable pfault pseudo page faults on this cpu. */
         pfault_init();
   
         /* call cpu notifiers */
         notify_cpu_starting(smp_processor_id());
         /* Mark this cpu as online */
-       spin_lock(&call_lock);
+       ipi_call_lock();
         cpu_set(smp_processor_id(), cpu_online_map);
-       spin_unlock(&call_lock);
+       ipi_call_unlock();
         /* Switch on interrupts */
         local_irq_enable();
         /* Print info about this processor */
@@@ -633,18 -503,15 +497,15 @@@ static int __cpuinit smp_alloc_lowcore(
   
                 save_area = get_zeroed_page(GFP_KERNEL);
                 if (!save_area)
-                       goto out_save_area;
+                       goto out;
                 lowcore->extended_save_area_addr = (u32) save_area;
         }
   #endif
         lowcore_ptr[cpu] = lowcore;
         return 0;
   
- #ifndef CONFIG_64BIT
- out_save_area:
-       free_page(panic_stack);
- #endif
   out:
+       free_page(panic_stack);
         free_pages(async_stack, ASYNC_ORDER);
         free_pages((unsigned long) lowcore, lc_order);
         return -ENOMEM;
@@@ -684,12 -551,8 +545,8 @@@ int __cpuinit __cpu_up(unsigned int cpu
   
         ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]),
                                    cpu, sigp_set_prefix);
-       if (ccode) {
-               printk("sigp_set_prefix failed for cpu %d "
-                      "with condition code %d\n",
-                      (int) cpu, (int) ccode);
+       if (ccode)
                 return -EIO;
-       }
   
         idle = current_set[cpu];
         cpu_lowcore = lowcore_ptr[cpu];
@@@ -772,7 -635,7 +629,7 @@@ void __cpu_die(unsigned int cpu
         while (!smp_cpu_not_running(cpu))
                 cpu_relax();
         smp_free_lowcore(cpu);
-       printk(KERN_INFO "Processor %d spun down\n", cpu);
+       pr_info("Processor %d stopped\n", cpu);
   }
   
   void cpu_die(void)
diff --combined arch/s390/kernel/time.c

index f5bd141,5be981a..d649600
--- 1/arch/s390/kernel/time.c
--- 2/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@@ -12,6 -12,9 +12,9 @@@
    *    Copyright (C) 1991, 1992, 1995  Linus Torvalds
    */
   
+ #define KMSG_COMPONENT "time"
+ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+ 
   #include <linux/errno.h>
   #include <linux/module.h>
   #include <linux/sched.h>
@@@ -20,6 -23,8 +23,8 @@@
   #include <linux/string.h>
   #include <linux/mm.h>
   #include <linux/interrupt.h>
+ #include <linux/cpu.h>
+ #include <linux/stop_machine.h>
   #include <linux/time.h>
   #include <linux/sysdev.h>
   #include <linux/delay.h>
@@@ -36,6 -41,7 +41,7 @@@
   #include <asm/delay.h>
   #include <asm/s390_ext.h>
   #include <asm/div64.h>
+ #include <asm/vdso.h>
   #include <asm/irq.h>
   #include <asm/irq_regs.h>
   #include <asm/timer.h>
@@@ -154,7 -160,7 +160,7 @@@ void init_cpu_timer(void
         cd->min_delta_ns        = 1;
         cd->max_delta_ns        = LONG_MAX;
         cd->rating              = 400;
- -      cd->cpumask             = cpumask_of_cpu(cpu);
+ +      cd->cpumask             = cpumask_of(cpu);
         cd->set_next_event      = s390_next_event;
         cd->set_mode            = s390_set_mode;
   
@@@ -223,6 -229,36 +229,36 @@@ static struct clocksource clocksource_t
   };
   
   
+ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+ {
+       if (clock != &clocksource_tod)
+               return;
+ 
+       /* Make userspace gettimeofday spin until we're done. */
+       ++vdso_data->tb_update_count;
+       smp_wmb();
+       vdso_data->xtime_tod_stamp = clock->cycle_last;
+       vdso_data->xtime_clock_sec = xtime.tv_sec;
+       vdso_data->xtime_clock_nsec = xtime.tv_nsec;
+       vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
+       vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
+       smp_wmb();
+       ++vdso_data->tb_update_count;
+ }
+ 
+ extern struct timezone sys_tz;
+ 
+ void update_vsyscall_tz(void)
+ {
+       /* Make userspace gettimeofday spin until we're done. */
+       ++vdso_data->tb_update_count;
+       smp_wmb();
+       vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+       vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+       smp_wmb();
+       ++vdso_data->tb_update_count;
+ }
+ 
   /*
    * Initialize the TOD clock and the CPU timer of
    * the boot cpu.
@@@ -253,10 -289,8 +289,8 @@@ void __init time_init(void
   
         /* Enable TOD clock interrupts on the boot cpu. */
         init_cpu_timer();
- 
- #ifdef CONFIG_VIRT_TIMER
+       /* Enable cpu timer interrupts on the boot cpu. */
         vtime_init();
- #endif
   }
   
   /*
@@@ -288,8 -322,8 +322,8 @@@ static unsigned long long adjust_time(u
         }
         sched_clock_base_cc += delta;
         if (adjust.offset != 0) {
-               printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
-                      adjust.offset);
+               pr_notice("The ETR interface has adjusted the clock "
+                         "by %li microseconds\n", adjust.offset);
                 adjust.modes = ADJ_OFFSET_SINGLESHOT;
                 do_adjtimex(&adjust);
         }
@@@ -360,6 -394,15 +394,15 @@@ static void enable_sync_clock(void
         atomic_set_mask(0x80000000, sw_ptr);
   }
   
+ /* Single threaded workqueue used for etr and stp sync events */
+ static struct workqueue_struct *time_sync_wq;
+ 
+ static void __init time_init_wq(void)
+ {
+       if (!time_sync_wq)
+               time_sync_wq = create_singlethread_workqueue("timesync");
+ }
+ 
   /*
    * External Time Reference (ETR) code.
    */
@@@ -425,6 -468,7 +468,7 @@@ static struct timer_list etr_timer
   
   static void etr_timeout(unsigned long dummy);
   static void etr_work_fn(struct work_struct *work);
+ static DEFINE_MUTEX(etr_work_mutex);
   static DECLARE_WORK(etr_work, etr_work_fn);
   
   /*
@@@ -440,8 -484,8 +484,8 @@@ static void etr_reset(void
                 etr_tolec = get_clock();
                 set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
         } else if (etr_port0_online || etr_port1_online) {
-               printk(KERN_WARNING "Running on non ETR capable "
-                      "machine, only local mode available.\n");
+               pr_warning("The real or virtual hardware system does "
+                          "not provide an ETR interface\n");
                 etr_port0_online = etr_port1_online = 0;
         }
   }
@@@ -452,17 -496,18 +496,18 @@@ static int __init etr_init(void
   
         if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
                 return 0;
+       time_init_wq();
         /* Check if this machine has the steai instruction. */
         if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
                 etr_steai_available = 1;
         setup_timer(&etr_timer, etr_timeout, 0UL);
         if (etr_port0_online) {
                 set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-               schedule_work(&etr_work);
+               queue_work(time_sync_wq, &etr_work);
         }
         if (etr_port1_online) {
                 set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-               schedule_work(&etr_work);
+               queue_work(time_sync_wq, &etr_work);
         }
         return 0;
   }
@@@ -489,7 -534,7 +534,7 @@@ void etr_switch_to_local(void
         if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
                 disable_sync_clock(NULL);
         set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events);
-       schedule_work(&etr_work);
+       queue_work(time_sync_wq, &etr_work);
   }
   
   /*
@@@ -505,7 -550,7 +550,7 @@@ void etr_sync_check(void
         if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
                 disable_sync_clock(NULL);
         set_bit(ETR_EVENT_SYNC_CHECK, &etr_events);
-       schedule_work(&etr_work);
+       queue_work(time_sync_wq, &etr_work);
   }
   
   /*
@@@ -529,13 -574,13 +574,13 @@@ static void etr_timing_alert(struct etr
                  * Both ports are not up-to-date now.
                  */
                 set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
-       schedule_work(&etr_work);
+       queue_work(time_sync_wq, &etr_work);
   }
   
   static void etr_timeout(unsigned long dummy)
   {
         set_bit(ETR_EVENT_UPDATE, &etr_events);
-       schedule_work(&etr_work);
+       queue_work(time_sync_wq, &etr_work);
   }
   
   /*
@@@ -642,14 -687,16 +687,16 @@@ static int etr_aib_follows(struct etr_a
   }
   
   struct clock_sync_data {
+       atomic_t cpus;
         int in_sync;
         unsigned long long fixup_cc;
+       int etr_port;
+       struct etr_aib *etr_aib;
   };
   
- static void clock_sync_cpu_start(void *dummy)
+ static void clock_sync_cpu(struct clock_sync_data *sync)
   {
-       struct clock_sync_data *sync = dummy;
- 
+       atomic_dec(&sync->cpus);
         enable_sync_clock();
         /*
          * This looks like a busy wait loop but it isn't. etr_sync_cpus
@@@ -675,39 -722,35 +722,35 @@@
         fixup_clock_comparator(sync->fixup_cc);
   }
   
- static void clock_sync_cpu_end(void *dummy)
- {
- }
- 
   /*
    * Sync the TOD clock using the port refered to by aibp. This port
    * has to be enabled and the other port has to be disabled. The
    * last eacr update has to be more than 1.6 seconds in the past.
    */
- static int etr_sync_clock(struct etr_aib *aib, int port)
+ static int etr_sync_clock(void *data)
   {
-       struct etr_aib *sync_port;
-       struct clock_sync_data etr_sync;
+       static int first;
         unsigned long long clock, old_clock, delay, delta;
-       int follows;
+       struct clock_sync_data *etr_sync;
+       struct etr_aib *sync_port, *aib;
+       int port;
         int rc;
   
-       /* Check if the current aib is adjacent to the sync port aib. */
-       sync_port = (port == 0) ? &etr_port0 : &etr_port1;
-       follows = etr_aib_follows(sync_port, aib, port);
-       memcpy(sync_port, aib, sizeof(*aib));
-       if (!follows)
-               return -EAGAIN;
+       etr_sync = data;
   
-       /*
-        * Catch all other cpus and make them wait until we have
-        * successfully synced the clock. smp_call_function will
-        * return after all other cpus are in etr_sync_cpu_start.
-        */
-       memset(&etr_sync, 0, sizeof(etr_sync));
-       preempt_disable();
-       smp_call_function(clock_sync_cpu_start, &etr_sync, 0);
-       local_irq_disable();
+       if (xchg(&first, 1) == 1) {
+               /* Slave */
+               clock_sync_cpu(etr_sync);
+               return 0;
+       }
+ 
+       /* Wait until all other cpus entered the sync function. */
+       while (atomic_read(&etr_sync->cpus) != 0)
+               cpu_relax();
+ 
+       port = etr_sync->etr_port;
+       aib = etr_sync->etr_aib;
+       sync_port = (port == 0) ? &etr_port0 : &etr_port1;
         enable_sync_clock();
   
         /* Set clock to next OTE. */
@@@ -724,16 -767,16 +767,16 @@@
                 delay = (unsigned long long)
                         (aib->edf2.etv - sync_port->edf2.etv) << 32;
                 delta = adjust_time(old_clock, clock, delay);
-               etr_sync.fixup_cc = delta;
+               etr_sync->fixup_cc = delta;
                 fixup_clock_comparator(delta);
                 /* Verify that the clock is properly set. */
                 if (!etr_aib_follows(sync_port, aib, port)) {
                         /* Didn't work. */
                         disable_sync_clock(NULL);
-                       etr_sync.in_sync = -EAGAIN;
+                       etr_sync->in_sync = -EAGAIN;
                         rc = -EAGAIN;
                 } else {
-                       etr_sync.in_sync = 1;
+                       etr_sync->in_sync = 1;
                         rc = 0;
                 }
         } else {
@@@ -741,12 -784,33 +784,33 @@@
                 __ctl_clear_bit(0, 29);
                 __ctl_clear_bit(14, 21);
                 disable_sync_clock(NULL);
-               etr_sync.in_sync = -EAGAIN;
+               etr_sync->in_sync = -EAGAIN;
                 rc = -EAGAIN;
         }
-       local_irq_enable();
-       smp_call_function(clock_sync_cpu_end, NULL, 0);
-       preempt_enable();
+       xchg(&first, 0);
+       return rc;
+ }
+ 
+ static int etr_sync_clock_stop(struct etr_aib *aib, int port)
+ {
+       struct clock_sync_data etr_sync;
+       struct etr_aib *sync_port;
+       int follows;
+       int rc;
+ 
+       /* Check if the current aib is adjacent to the sync port aib. */
+       sync_port = (port == 0) ? &etr_port0 : &etr_port1;
+       follows = etr_aib_follows(sync_port, aib, port);
+       memcpy(sync_port, aib, sizeof(*aib));
+       if (!follows)
+               return -EAGAIN;
+       memset(&etr_sync, 0, sizeof(etr_sync));
+       etr_sync.etr_aib = aib;
+       etr_sync.etr_port = port;
+       get_online_cpus();
+       atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
+       rc = stop_machine(etr_sync_clock, &etr_sync, &cpu_online_map);
+       put_online_cpus();
         return rc;
   }
   
@@@ -903,7 -967,7 +967,7 @@@ static void etr_update_eacr(struct etr_
   }
   
   /*
-  * ETR tasklet. In this function you'll find the main logic. In
+  * ETR work. In this function you'll find the main logic. In
    * particular this is the only function that calls etr_update_eacr(),
    * it "controls" the etr control register.
    */
@@@ -914,6 -978,9 +978,9 @@@ static void etr_work_fn(struct work_str
         struct etr_aib aib;
         int sync_port;
   
+       /* prevent multiple execution. */
+       mutex_lock(&etr_work_mutex);
+ 
         /* Create working copy of etr_eacr. */
         eacr = etr_eacr;
   
@@@ -929,7 -996,7 +996,7 @@@
                 del_timer_sync(&etr_timer);
                 etr_update_eacr(eacr);
                 clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-               return;
+               goto out_unlock;
         }
   
         /* Store aib to get the current ETR status word. */
@@@ -1016,7 -1083,7 +1083,7 @@@
             eacr.es || sync_port < 0) {
                 etr_update_eacr(eacr);
                 etr_set_tolec_timeout(now);
-               return;
+               goto out_unlock;
         }
   
         /*
@@@ -1036,7 -1103,7 +1103,7 @@@
         etr_update_eacr(eacr);
         set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
         if (now < etr_tolec + (1600000 << 12) ||
-           etr_sync_clock(&aib, sync_port) != 0) {
+           etr_sync_clock_stop(&aib, sync_port) != 0) {
                 /* Sync failed. Try again in 1/2 second. */
                 eacr.es = 0;
                 etr_update_eacr(eacr);
@@@ -1044,6 -1111,8 +1111,8 @@@
                 etr_set_sync_timeout();
         } else
                 etr_set_tolec_timeout(now);
+ out_unlock:
+       mutex_unlock(&etr_work_mutex);
   }
   
   /*
@@@ -1125,13 -1194,13 +1194,13 @@@ static ssize_t etr_online_store(struct 
                         return count;   /* Nothing to do. */
                 etr_port0_online = value;
                 set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-               schedule_work(&etr_work);
+               queue_work(time_sync_wq, &etr_work);
         } else {
                 if (etr_port1_online == value)
                         return count;   /* Nothing to do. */
                 etr_port1_online = value;
                 set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-               schedule_work(&etr_work);
+               queue_work(time_sync_wq, &etr_work);
         }
         return count;
   }
@@@ -1332,6 -1401,7 +1401,7 @@@ static struct stp_sstpi stp_info
   static void *stp_page;
   
   static void stp_work_fn(struct work_struct *work);
+ static DEFINE_MUTEX(stp_work_mutex);
   static DECLARE_WORK(stp_work, stp_work_fn);
   
   static int __init early_parse_stp(char *p)
@@@ -1356,7 -1426,8 +1426,8 @@@ static void __init stp_reset(void
         if (rc == 0)
                 set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
         else if (stp_online) {
-               printk(KERN_WARNING "Running on non STP capable machine.\n");
+               pr_warning("The real or virtual hardware system does "
+                          "not provide an STP interface\n");
                 free_bootmem((unsigned long) stp_page, PAGE_SIZE);
                 stp_page = NULL;
                 stp_online = 0;
@@@ -1365,8 -1436,12 +1436,12 @@@
   
   static int __init stp_init(void)
   {
-       if (test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online)
-               schedule_work(&stp_work);
+       if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+               return 0;
+       time_init_wq();
+       if (!stp_online)
+               return 0;
+       queue_work(time_sync_wq, &stp_work);
         return 0;
   }
   
@@@ -1383,7 -1458,7 +1458,7 @@@ arch_initcall(stp_init)
   static void stp_timing_alert(struct stp_irq_parm *intparm)
   {
         if (intparm->tsc || intparm->lac || intparm->tcpc)
-               schedule_work(&stp_work);
+               queue_work(time_sync_wq, &stp_work);
   }
   
   /*
@@@ -1397,7 -1472,7 +1472,7 @@@ void stp_sync_check(void
         if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
                 return;
         disable_sync_clock(NULL);
-       schedule_work(&stp_work);
+       queue_work(time_sync_wq, &stp_work);
   }
   
   /*
@@@ -1411,46 -1486,34 +1486,34 @@@ void stp_island_check(void
         if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
                 return;
         disable_sync_clock(NULL);
-       schedule_work(&stp_work);
+       queue_work(time_sync_wq, &stp_work);
   }
   
- /*
-  * STP tasklet. Check for the STP state and take over the clock
-  * synchronization if the STP clock source is usable.
-  */
- static void stp_work_fn(struct work_struct *work)
+ 
+ static int stp_sync_clock(void *data)
   {
-       struct clock_sync_data stp_sync;
+       static int first;
         unsigned long long old_clock, delta;
+       struct clock_sync_data *stp_sync;
         int rc;
   
-       if (!stp_online) {
-               chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
-               return;
-       }
+       stp_sync = data;
   
-       rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
-       if (rc)
-               return;
+       if (xchg(&first, 1) == 1) {
+               /* Slave */
+               clock_sync_cpu(stp_sync);
+               return 0;
+       }
   
-       rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
-       if (rc || stp_info.c == 0)
-               return;
+       /* Wait until all other cpus entered the sync function. */
+       while (atomic_read(&stp_sync->cpus) != 0)
+               cpu_relax();
   
-       /*
-        * Catch all other cpus and make them wait until we have
-        * successfully synced the clock. smp_call_function will
-        * return after all other cpus are in clock_sync_cpu_start.
-        */
-       memset(&stp_sync, 0, sizeof(stp_sync));
-       preempt_disable();
-       smp_call_function(clock_sync_cpu_start, &stp_sync, 0);
-       local_irq_disable();
         enable_sync_clock();
   
         set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
         if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
-               schedule_work(&etr_work);
+               queue_work(time_sync_wq, &etr_work);
   
         rc = 0;
         if (stp_info.todoff[0] || stp_info.todoff[1] ||
@@@ -1469,16 -1532,49 +1532,49 @@@
         }
         if (rc) {
                 disable_sync_clock(NULL);
-               stp_sync.in_sync = -EAGAIN;
+               stp_sync->in_sync = -EAGAIN;
                 clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
                 if (etr_port0_online || etr_port1_online)
-                       schedule_work(&etr_work);
+                       queue_work(time_sync_wq, &etr_work);
         } else
-               stp_sync.in_sync = 1;
+               stp_sync->in_sync = 1;
+       xchg(&first, 0);
+       return 0;
+ }
+ 
+ /*
+  * STP work. Check for the STP state and take over the clock
+  * synchronization if the STP clock source is usable.
+  */
+ static void stp_work_fn(struct work_struct *work)
+ {
+       struct clock_sync_data stp_sync;
+       int rc;
+ 
+       /* prevent multiple execution. */
+       mutex_lock(&stp_work_mutex);
+ 
+       if (!stp_online) {
+               chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+               goto out_unlock;
+       }
+ 
+       rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+       if (rc)
+               goto out_unlock;
+ 
+       rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+       if (rc || stp_info.c == 0)
+               goto out_unlock;
+ 
+       memset(&stp_sync, 0, sizeof(stp_sync));
+       get_online_cpus();
+       atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+       stop_machine(stp_sync_clock, &stp_sync, &cpu_online_map);
+       put_online_cpus();
   
-       local_irq_enable();
-       smp_call_function(clock_sync_cpu_end, NULL, 0);
-       preempt_enable();
+ out_unlock:
+       mutex_unlock(&stp_work_mutex);
   }
   
   /*
@@@ -1587,7 -1683,7 +1683,7 @@@ static ssize_t stp_online_store(struct 
         if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
                 return -EOPNOTSUPP;
         stp_online = value;
-       schedule_work(&stp_work);
+       queue_work(time_sync_wq, &stp_work);
         return count;
   }
   
diff --combined arch/s390/kernel/topology.c

index 0601cd3,90e9ba1..cc362c9
--- 1/arch/s390/kernel/topology.c
--- 2/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@@ -3,6 -3,9 +3,9 @@@
    *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
    */
   
+ #define KMSG_COMPONENT "cpu"
+ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+ 
   #include <linux/kernel.h>
   #include <linux/mm.h>
   #include <linux/init.h>
@@@ -12,6 -15,7 +15,7 @@@
   #include <linux/workqueue.h>
   #include <linux/cpu.h>
   #include <linux/smp.h>
+ #include <linux/cpuset.h>
   #include <asm/delay.h>
   #include <asm/s390_ext.h>
   #include <asm/sysinfo.h>
@@@ -57,11 -61,11 +61,11 @@@ struct core_info 
         cpumask_t mask;
   };
   
+ static int topology_enabled;
   static void topology_work_fn(struct work_struct *work);
   static struct tl_info *tl_info;
   static struct core_info core_info;
   static int machine_has_topology;
- static int machine_has_topology_irq;
   static struct timer_list topology_timer;
   static void set_topology_timer(void);
   static DECLARE_WORK(topology_work, topology_work_fn);
@@@ -77,8 -81,8 +81,8 @@@ cpumask_t cpu_coregroup_map(unsigned in
         cpumask_t mask;
   
         cpus_clear(mask);
-       if (!machine_has_topology)
-               return cpu_present_map;
+       if (!topology_enabled || !machine_has_topology)
+               return cpu_possible_map;
         spin_lock_irqsave(&topology_lock, flags);
         while (core) {
                 if (cpu_isset(cpu, core->mask)) {
@@@ -93,11 -97,6 +97,11 @@@
         return mask;
   }
   
+ +const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+ +{
+ +      return &cpu_core_map[cpu];
+ +}
+ +
   static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
   {
         unsigned int cpu;
@@@ -173,7 -172,7 +177,7 @@@ static void topology_update_polarizatio
         int cpu;
   
         mutex_lock(&smp_cpu_state_mutex);
-       for_each_present_cpu(cpu)
+       for_each_possible_cpu(cpu)
                 smp_cpu_polarization[cpu] = POLARIZATION_HRZ;
         mutex_unlock(&smp_cpu_state_mutex);
   }
@@@ -204,7 -203,7 +208,7 @@@ int topology_set_cpu_management(int fc
                 rc = ptf(PTF_HORIZONTAL);
         if (rc)
                 return -EBUSY;
-       for_each_present_cpu(cpu)
+       for_each_possible_cpu(cpu)
                 smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
         return rc;
   }
@@@ -213,11 -212,11 +217,11 @@@ static void update_cpu_core_map(void
   {
         int cpu;
   
-       for_each_present_cpu(cpu)
+       for_each_possible_cpu(cpu)
                 cpu_core_map[cpu] = cpu_coregroup_map(cpu);
   }
   
- void arch_update_cpu_topology(void)
+ int arch_update_cpu_topology(void)
   {
         struct tl_info *info = tl_info;
         struct sys_device *sysdev;
@@@ -226,7 -225,7 +230,7 @@@
         if (!machine_has_topology) {
                 update_cpu_core_map();
                 topology_update_polarization_simple();
-               return;
+               return 0;
         }
         stsi(info, 15, 1, 2);
         tl_to_cores(info);
@@@ -235,11 -234,12 +239,12 @@@
                 sysdev = get_cpu_sysdev(cpu);
                 kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
         }
+       return 1;
   }
   
   static void topology_work_fn(struct work_struct *work)
   {
-       arch_reinit_sched_domains();
+       rebuild_sched_domains();
   }
   
   void topology_schedule_update(void)
@@@ -262,10 -262,14 +267,14 @@@ static void set_topology_timer(void
         add_timer(&topology_timer);
   }
   
- static void topology_interrupt(__u16 code)
+ static int __init early_parse_topology(char *p)
   {
-       schedule_work(&topology_work);
+       if (strncmp(p, "on", 2))
+               return 0;
+       topology_enabled = 1;
+       return 0;
   }
+ early_param("topology", early_parse_topology);
   
   static int __init init_topology_update(void)
   {
@@@ -277,14 -281,7 +286,7 @@@
                 goto out;
         }
         init_timer_deferrable(&topology_timer);
-       if (machine_has_topology_irq) {
-               rc = register_external_interrupt(0x2005, topology_interrupt);
-               if (rc)
-                       goto out;
-               ctl_set_bit(0, 8);
-       }
-       else
-               set_topology_timer();
+       set_topology_timer();
   out:
         update_cpu_core_map();
         return rc;
@@@ -305,9 -302,6 +307,6 @@@ void __init s390_init_cpu_topology(void
                 return;
         machine_has_topology = 1;
   
-       if (facility_bits & (1ULL << 51))
-               machine_has_topology_irq = 1;
- 
         tl_info = alloc_bootmem_pages(PAGE_SIZE);
         info = tl_info;
         stsi(info, 15, 1, 2);
@@@ -316,7 -310,7 +315,7 @@@
         for (i = 0; i < info->mnest - 2; i++)
                 nr_cores *= info->mag[NR_MAG - 3 - i];
   
-       printk(KERN_INFO "CPU topology:");
+       pr_info("The CPU configuration topology of the machine is:");
         for (i = 0; i < NR_MAG; i++)
                 printk(" %d", info->mag[i]);
         printk(" / %d\n", info->mnest);
@@@ -331,5 -325,4 +330,4 @@@
         return;
   error:
         machine_has_topology = 0;
-       machine_has_topology_irq = 0;
   }
diff --combined arch/x86/include/asm/pci.h

index 52d80d3,6477812..f8959c7
--- 1/arch/x86/include/asm/pci.h
--- 2/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@@ -19,6 -19,8 +19,8 @@@ struct pci_sysdata 
   };
   
   extern int pci_routeirq;
+ extern int noioapicquirk;
+ extern int noioapicreroute;
   
   /* scan a bus after allocating a pci_sysdata for it */
   extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
@@@ -98,9 -100,9 +100,9 @@@ static inline void early_quirks(void) 
   
   #ifdef CONFIG_NUMA
   /* Returns the node based on pci bus */
- -static inline int __pcibus_to_node(struct pci_bus *bus)
+ +static inline int __pcibus_to_node(const struct pci_bus *bus)
   {
- -      struct pci_sysdata *sd = bus->sysdata;
+ +      const struct pci_sysdata *sd = bus->sysdata;
   
         return sd->node;
   }
@@@ -109,12 -111,6 +111,12 @@@ static inline cpumask_t __pcibus_to_cpu
   {
         return node_to_cpumask(__pcibus_to_node(bus));
   }
+ +
+ +static inline const struct cpumask *
+ +cpumask_of_pcibus(const struct pci_bus *bus)
+ +{
+ +      return cpumask_of_node(__pcibus_to_node(bus));
+ +}
   #endif
   
   #endif /* _ASM_X86_PCI_H */
diff --combined arch/x86/kernel/apic.c

index b2cef49,b5229af..6107b41
--- 1/arch/x86/kernel/apic.c
--- 2/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@@ -30,6 -30,7 +30,7 @@@
   #include <linux/module.h>
   #include <linux/dmi.h>
   #include <linux/dmar.h>
+ #include <linux/ftrace.h>
   
   #include <asm/atomic.h>
   #include <asm/smp.h>
@@@ -141,7 -142,7 +142,7 @@@ static int lapic_next_event(unsigned lo
                             struct clock_event_device *evt);
   static void lapic_timer_setup(enum clock_event_mode mode,
                               struct clock_event_device *evt);
- -static void lapic_timer_broadcast(cpumask_t mask);
+ +static void lapic_timer_broadcast(const struct cpumask *mask);
   static void apic_pm_activate(void);
   
   /*
@@@ -441,6 -442,7 +442,7 @@@ static void lapic_timer_setup(enum cloc
                 v = apic_read(APIC_LVTT);
                 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
                 apic_write(APIC_LVTT, v);
+               apic_write(APIC_TMICT, 0xffffffff);
                 break;
         case CLOCK_EVT_MODE_RESUME:
                 /* Nothing to do here */
@@@ -453,10 -455,10 +455,10 @@@
   /*
    * Local APIC timer broadcast function
    */
- -static void lapic_timer_broadcast(cpumask_t mask)
+ +static void lapic_timer_broadcast(const struct cpumask *mask)
   {
   #ifdef CONFIG_SMP
- -      send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+ +      send_IPI_mask(*mask, LOCAL_TIMER_VECTOR);
   #endif
   }
   
@@@ -469,7 -471,7 +471,7 @@@ static void __cpuinit setup_APIC_timer(
         struct clock_event_device *levt = &__get_cpu_var(lapic_events);
   
         memcpy(levt, &lapic_clockevent, sizeof(*levt));
- -      levt->cpumask = cpumask_of_cpu(smp_processor_id());
+ +      levt->cpumask = cpumask_of(smp_processor_id());
   
         clockevents_register_device(levt);
   }
@@@ -559,13 -561,13 +561,13 @@@ static int __init calibrate_by_pmtimer(
         } else {
                 res = (((u64)deltapm) *  mult) >> 22;
                 do_div(res, 1000000);
-               printk(KERN_WARNING "APIC calibration not consistent "
+               pr_warning("APIC calibration not consistent "
                         "with PM Timer: %ldms instead of 100ms\n",
                         (long)res);
                 /* Correct the lapic counter value */
                 res = (((u64)(*delta)) * pm_100ms);
                 do_div(res, deltapm);
-               printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+               pr_info("APIC delta adjusted to PM-Timer: "
                         "%lu (%ld)\n", (unsigned long)res, *delta);
                 *delta = (long)res;
         }
@@@ -645,8 -647,7 +647,7 @@@ static int __init calibrate_APIC_clock(
          */
         if (calibration_result < (1000000 / HZ)) {
                 local_irq_enable();
-               printk(KERN_WARNING
-                      "APIC frequency too slow, disabling apic timer\n");
+               pr_warning("APIC frequency too slow, disabling apic timer\n");
                 return -1;
         }
   
@@@ -672,13 -673,9 +673,9 @@@
                 while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
                         cpu_relax();
   
                 /* Stop the lapic timer */
                 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
   
-               local_irq_enable();
- 
                 /* Jiffies delta */
                 deltaj = lapic_cal_j2 - lapic_cal_j1;
                 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
@@@ -692,8 -689,7 +689,7 @@@
                 local_irq_enable();
   
         if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
-               printk(KERN_WARNING
-                      "APIC timer disabled due to verification failure.\n");
+               pr_warning("APIC timer disabled due to verification failure.\n");
                         return -1;
         }
   
@@@ -714,7 -710,7 +710,7 @@@ void __init setup_boot_APIC_clock(void
          * broadcast mechanism is used. On UP systems simply ignore it.
          */
         if (disable_apic_timer) {
-               printk(KERN_INFO "Disabling APIC timer\n");
+               pr_info("Disabling APIC timer\n");
                 /* No broadcast on UP ! */
                 if (num_possible_cpus() > 1) {
                         lapic_clockevent.mult = 1;
@@@ -741,7 -737,7 +737,7 @@@
         if (nmi_watchdog != NMI_IO_APIC)
                 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
         else
-               printk(KERN_WARNING "APIC timer registered as dummy,"
+               pr_warning("APIC timer registered as dummy,"
                         " due to nmi_watchdog=%d!\n", nmi_watchdog);
   
         /* Setup the lapic or request the broadcast */
@@@ -773,8 -769,7 +769,7 @@@ static void local_apic_timer_interrupt(
          * spurious.
          */
         if (!evt->event_handler) {
-               printk(KERN_WARNING
-                      "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+               pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
                 /* Switch it off */
                 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
                 return;
@@@ -783,11 -778,7 +778,7 @@@
         /*
          * the NMI deadlock-detector uses this.
          */
- #ifdef CONFIG_X86_64
-       add_pda(apic_timer_irqs, 1);
- #else
-       per_cpu(irq_stat, cpu).apic_timer_irqs++;
- #endif
+       inc_irq_stat(apic_timer_irqs);
   
         evt->event_handler(evt);
   }
@@@ -800,7 -791,7 +791,7 @@@
    * [ if a single-CPU system runs an SMP kernel then we call the local
    *   interrupt as well. Thus we cannot inline the local irq ... ]
    */
- void smp_apic_timer_interrupt(struct pt_regs *regs)
+ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
   {
         struct pt_regs *old_regs = set_irq_regs(regs);
   
@@@ -814,9 -805,7 +805,7 @@@
          * Besides, if we don't timer interrupts ignore the global
          * interrupt lock, which is the WrongThing (tm) to do.
          */
- #ifdef CONFIG_X86_64
         exit_idle();
- #endif
         irq_enter();
         local_apic_timer_interrupt();
         irq_exit();
@@@ -1093,7 -1082,7 +1082,7 @@@ static void __cpuinit lapic_setup_esr(v
         unsigned int oldvalue, value, maxlvt;
   
         if (!lapic_is_integrated()) {
-               printk(KERN_INFO "No ESR for 82489DX.\n");
+               pr_info("No ESR for 82489DX.\n");
                 return;
         }
   
@@@ -1104,7 -1093,7 +1093,7 @@@
                  * ESR disabled - we can't do anything useful with the
                  * errors anyway - mbligh
                  */
-               printk(KERN_INFO "Leaving ESR disabled.\n");
+               pr_info("Leaving ESR disabled.\n");
                 return;
         }
   
@@@ -1298,7 -1287,7 +1287,7 @@@ void check_x2apic(void
         rdmsr(MSR_IA32_APICBASE, msr, msr2);
   
         if (msr & X2APIC_ENABLE) {
-               printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+               pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
                 x2apic_preenabled = x2apic = 1;
                 apic_ops = &x2apic_ops;
         }
@@@ -1310,7 -1299,7 +1299,7 @@@ void enable_x2apic(void
   
         rdmsr(MSR_IA32_APICBASE, msr, msr2);
         if (!(msr & X2APIC_ENABLE)) {
-               printk("Enabling x2apic\n");
+               pr_info("Enabling x2apic\n");
                 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
         }
   }
@@@ -1325,9 -1314,8 +1314,8 @@@ void __init enable_IR_x2apic(void
                 return;
   
         if (!x2apic_preenabled && disable_x2apic) {
-               printk(KERN_INFO
-                      "Skipped enabling x2apic and Interrupt-remapping "
-                      "because of nox2apic\n");
+               pr_info("Skipped enabling x2apic and Interrupt-remapping "
+                       "because of nox2apic\n");
                 return;
         }
   
@@@ -1335,22 -1323,19 +1323,19 @@@
                 panic("Bios already enabled x2apic, can't enforce nox2apic");
   
         if (!x2apic_preenabled && skip_ioapic_setup) {
-               printk(KERN_INFO
-                      "Skipped enabling x2apic and Interrupt-remapping "
-                      "because of skipping io-apic setup\n");
+               pr_info("Skipped enabling x2apic and Interrupt-remapping "
+                       "because of skipping io-apic setup\n");
                 return;
         }
   
         ret = dmar_table_init();
         if (ret) {
-               printk(KERN_INFO
-                      "dmar_table_init() failed with %d:\n", ret);
+               pr_info("dmar_table_init() failed with %d:\n", ret);
   
                 if (x2apic_preenabled)
                         panic("x2apic enabled by bios. But IR enabling failed");
                 else
-                       printk(KERN_INFO
-                              "Not enabling x2apic,Intr-remapping\n");
+                       pr_info("Not enabling x2apic,Intr-remapping\n");
                 return;
         }
   
@@@ -1359,7 -1344,7 +1344,7 @@@
   
         ret = save_mask_IO_APIC_setup();
         if (ret) {
-               printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+               pr_info("Saving IO-APIC state failed: %d\n", ret);
                 goto end;
         }
   
@@@ -1394,14 -1379,11 +1379,11 @@@ end
   
         if (!ret) {
                 if (!x2apic_preenabled)
-                       printk(KERN_INFO
-                              "Enabled x2apic and interrupt-remapping\n");
+                       pr_info("Enabled x2apic and interrupt-remapping\n");
                 else
-                       printk(KERN_INFO
-                              "Enabled Interrupt-remapping\n");
+                       pr_info("Enabled Interrupt-remapping\n");
         } else
-               printk(KERN_ERR
-                      "Failed to enable Interrupt-remapping and x2apic\n");
+               pr_err("Failed to enable Interrupt-remapping and x2apic\n");
   #else
         if (!cpu_has_x2apic)
                 return;
@@@ -1410,8 -1392,8 +1392,8 @@@
                 panic("x2apic enabled prior OS handover,"
                       " enable CONFIG_INTR_REMAP");
   
-       printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-              " and x2apic\n");
+       pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+               " and x2apic\n");
   #endif
   
         return;
@@@ -1428,7 -1410,7 +1410,7 @@@
   static int __init detect_init_APIC(void)
   {
         if (!cpu_has_apic) {
-               printk(KERN_INFO "No local APIC present\n");
+               pr_info("No local APIC present\n");
                 return -1;
         }
   
@@@ -1469,8 -1451,8 +1451,8 @@@ static int __init detect_init_APIC(void
                  * "lapic" specified.
                  */
                 if (!force_enable_local_apic) {
-                       printk(KERN_INFO "Local APIC disabled by BIOS -- "
-                              "you can enable it with \"lapic\"\n");
+                       pr_info("Local APIC disabled by BIOS -- "
+                               "you can enable it with \"lapic\"\n");
                         return -1;
                 }
                 /*
@@@ -1480,8 -1462,7 +1462,7 @@@
                  */
                 rdmsr(MSR_IA32_APICBASE, l, h);
                 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-                       printk(KERN_INFO
-                              "Local APIC disabled by BIOS -- reenabling.\n");
+                       pr_info("Local APIC disabled by BIOS -- reenabling.\n");
                         l &= ~MSR_IA32_APICBASE_BASE;
                         l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
                         wrmsr(MSR_IA32_APICBASE, l, h);
@@@ -1494,7 -1475,7 +1475,7 @@@
          */
         features = cpuid_edx(1);
         if (!(features & (1 << X86_FEATURE_APIC))) {
-               printk(KERN_WARNING "Could not enable APIC!\n");
+               pr_warning("Could not enable APIC!\n");
                 return -1;
         }
         set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@@ -1505,14 -1486,14 +1486,14 @@@
         if (l & MSR_IA32_APICBASE_ENABLE)
                 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
   
-       printk(KERN_INFO "Found and enabled local APIC!\n");
+       pr_info("Found and enabled local APIC!\n");
   
         apic_pm_activate();
   
         return 0;
   
   no_apic:
-       printk(KERN_INFO "No local APIC present or hardware disabled\n");
+       pr_info("No local APIC present or hardware disabled\n");
         return -1;
   }
   #endif
@@@ -1588,12 -1569,12 +1569,12 @@@ int __init APIC_init_uniprocessor(void
   {
   #ifdef CONFIG_X86_64
         if (disable_apic) {
-               printk(KERN_INFO "Apic disabled\n");
+               pr_info("Apic disabled\n");
                 return -1;
         }
         if (!cpu_has_apic) {
                 disable_apic = 1;
-               printk(KERN_INFO "Apic disabled by BIOS\n");
+               pr_info("Apic disabled by BIOS\n");
                 return -1;
         }
   #else
@@@ -1605,8 -1586,8 +1586,8 @@@
          */
         if (!cpu_has_apic &&
             APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-               printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
-                      boot_cpu_physical_apicid);
+               pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+                       boot_cpu_physical_apicid);
                 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
                 return -1;
         }
@@@ -1682,9 -1663,7 +1663,7 @@@ void smp_spurious_interrupt(struct pt_r
   {
         u32 v;
   
- #ifdef CONFIG_X86_64
         exit_idle();
- #endif
         irq_enter();
         /*
          * Check if this really is a spurious interrupt and ACK it
@@@ -1695,14 -1674,11 +1674,11 @@@
         if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
                 ack_APIC_irq();
   
- #ifdef CONFIG_X86_64
-       add_pda(irq_spurious_count, 1);
- #else
+       inc_irq_stat(irq_spurious_count);
+ 
         /* see sw-dev-man vol 3, chapter 7.4.13.5 */
-       printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
-              "should never happen.\n", smp_processor_id());
-       __get_cpu_var(irq_stat).irq_spurious_count++;
- #endif
+       pr_info("spurious APIC interrupt on CPU#%d, "
+               "should never happen.\n", smp_processor_id());
         irq_exit();
   }
   
@@@ -1713,9 -1689,7 +1689,7 @@@ void smp_error_interrupt(struct pt_reg
   {
         u32 v, v1;
   
- #ifdef CONFIG_X86_64
         exit_idle();
- #endif
         irq_enter();
         /* First tickle the hardware, only then report what went on. -- REW */
         v = apic_read(APIC_ESR);
@@@ -1724,17 -1698,18 +1698,18 @@@
         ack_APIC_irq();
         atomic_inc(&irq_err_count);
   
-       /* Here is what the APIC error bits mean:
-          0: Send CS error
-          1: Receive CS error
-          2: Send accept error
-          3: Receive accept error
-          4: Reserved
-          5: Send illegal vector
-          6: Received illegal vector
-          7: Illegal register address
-       */
-       printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+       /*
+        * Here is what the APIC error bits mean:
+        * 0: Send CS error
+        * 1: Receive CS error
+        * 2: Send accept error
+        * 3: Receive accept error
+        * 4: Reserved
+        * 5: Send illegal vector
+        * 6: Received illegal vector
+        * 7: Illegal register address
+        */
+       pr_debug("APIC error on CPU%d: %02x(%02x)\n",
                 smp_processor_id(), v , v1);
         irq_exit();
   }
@@@ -1838,15 -1813,15 +1813,15 @@@ void __cpuinit generic_processor_info(i
          * Validate version
          */
         if (version == 0x0) {
-               printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-                               "fixing up to 0x10. (tell your hw vendor)\n",
-                               version);
+               pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
+                       "fixing up to 0x10. (tell your hw vendor)\n",
+                       version);
                 version = 0x10;
         }
         apic_version[apicid] = version;
   
         if (num_processors >= NR_CPUS) {
-               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+               pr_warning("WARNING: NR_CPUS limit of %i reached."
                         "  Processor ignored.\n", NR_CPUS);
                 return;
         }
@@@ -2209,7 -2184,7 +2184,7 @@@ static int __init apic_set_verbosity(ch
         else if (strcmp("verbose", arg) == 0)
                 apic_verbosity = APIC_VERBOSE;
         else {
-               printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+               pr_warning("APIC Verbosity level %s not recognised"
                         " use apic=verbose or apic=debug\n", arg);
                 return -EINVAL;
         }
diff --combined arch/x86/kernel/cpu/intel_cacheinfo.c

index 43ea612,68b5d86..15cf14e
--- 1/arch/x86/kernel/cpu/intel_cacheinfo.c
--- 2/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@@ -626,8 -626,8 +626,8 @@@ static ssize_t show_shared_cpu_map_func
                 cpumask_t *mask = &this_leaf->shared_cpu_map;
   
                 n = type?
- -                      cpulist_scnprintf(buf, len-2, *mask):
- -                      cpumask_scnprintf(buf, len-2, *mask);
+ +                      cpulist_scnprintf(buf, len-2, mask) :
+ +                      cpumask_scnprintf(buf, len-2, mask);
                 buf[n++] = '\n';
                 buf[n] = '\0';
         }
@@@ -644,20 -644,17 +644,17 @@@ static inline ssize_t show_shared_cpu_l
         return show_shared_cpu_map_func(leaf, 1, buf);
   }
   
- static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
-       switch(this_leaf->eax.split.type) {
-           case CACHE_TYPE_DATA:
+ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+ {
+       switch (this_leaf->eax.split.type) {
+       case CACHE_TYPE_DATA:
                 return sprintf(buf, "Data\n");
-               break;
-           case CACHE_TYPE_INST:
+       case CACHE_TYPE_INST:
                 return sprintf(buf, "Instruction\n");
-               break;
-           case CACHE_TYPE_UNIFIED:
+       case CACHE_TYPE_UNIFIED:
                 return sprintf(buf, "Unified\n");
-               break;
-           default:
+       default:
                 return sprintf(buf, "Unknown\n");
-               break;
         }
   }
   
diff --combined arch/x86/kernel/hpet.c

index e76d7e2,3f0a3ed..b5310ff
--- 1/arch/x86/kernel/hpet.c
--- 2/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@@ -33,7 -33,9 +33,9 @@@
    * HPET address is set in acpi/boot.c, when an ACPI entry exists
    */
   unsigned long                         hpet_address;
- unsigned long                         hpet_num_timers;
+ #ifdef CONFIG_PCI_MSI
+ static unsigned long                  hpet_num_timers;
+ #endif
   static void __iomem                   *hpet_virt_address;
   
   struct hpet_dev {
@@@ -246,7 -248,7 +248,7 @@@ static void hpet_legacy_clockevent_regi
          * Start hpet with the boot cpu mask and make it
          * global after the IO_APIC has been initialized.
          */
- -      hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+ +      hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
         clockevents_register_device(&hpet_clockevent);
         global_clock_event = &hpet_clockevent;
         printk(KERN_DEBUG "hpet clockevent registered\n");
@@@ -301,7 -303,7 +303,7 @@@ static void hpet_set_mode(enum clock_ev
                         struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
                         hpet_setup_msi_irq(hdev->irq);
                         disable_irq(hdev->irq);
- -                      irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+ +                      irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
                         enable_irq(hdev->irq);
                 }
                 break;
@@@ -449,7 -451,7 +451,7 @@@ static int hpet_setup_irq(struct hpet_d
                 return -1;
   
         disable_irq(dev->irq);
- -      irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+ +      irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
         enable_irq(dev->irq);
   
         printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@@ -500,7 -502,7 +502,7 @@@ static void init_one_hpet_msi_clockeven
         /* 5 usec minimum reprogramming delta. */
         evt->min_delta_ns = 5000;
   
- -      evt->cpumask = cpumask_of_cpu(hdev->cpu);
+ +      evt->cpumask = cpumask_of(hdev->cpu);
         clockevents_register_device(evt);
   }
   
diff --combined arch/x86/kernel/io_apic.c

index 1184210,679e7bb..6dbf427
--- 1/arch/x86/kernel/io_apic.c
--- 2/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@@ -361,8 -361,7 +361,8 @@@ static void __target_IO_APIC_irq(unsign
   
   static int assign_irq_vector(int irq, cpumask_t mask);
   
- -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ +static void set_ioapic_affinity_irq(unsigned int irq,
+ +                                  const struct cpumask *mask)
   {
         struct irq_cfg *cfg;
         unsigned long flags;
@@@ -370,14 -369,15 +370,14 @@@
         cpumask_t tmp;
         struct irq_desc *desc;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
         cfg = irq_cfg(irq);
- -      if (assign_irq_vector(irq, mask))
+ +      if (assign_irq_vector(irq, *mask))
                 return;
   
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
         /*
          * Only the high 8 bits are valid.
@@@ -387,7 -387,7 +387,7 @@@
         desc = irq_to_desc(irq);
         spin_lock_irqsave(&ioapic_lock, flags);
         __target_IO_APIC_irq(irq, dest, cfg->vector);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
         spin_unlock_irqrestore(&ioapic_lock, flags);
   }
   #endif /* CONFIG_SMP */
@@@ -2189,7 -2189,7 +2189,7 @@@ static void ir_irq_migration(struct wor
                                 continue;
                         }
   
- -                      desc->chip->set_affinity(irq, desc->pending_mask);
+ +                      desc->chip->set_affinity(irq, &desc->pending_mask);
                         spin_unlock_irqrestore(&desc->lock, flags);
                 }
         }
@@@ -2198,29 -2198,27 +2198,28 @@@
   /*
    * Migrates the IRQ destination in the process context.
    */
- -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ +static void set_ir_ioapic_affinity_irq(unsigned int irq,
+ +                                     const struct cpumask *mask)
   {
         struct irq_desc *desc = irq_to_desc(irq);
   
         if (desc->status & IRQ_LEVEL) {
                 desc->status |= IRQ_MOVE_PENDING;
- -              desc->pending_mask = mask;
+ +              cpumask_copy(&desc->pending_mask, mask);
                 migrate_irq_remapped_level(irq);
                 return;
         }
   
- -      migrate_ioapic_irq(irq, mask);
+ +      migrate_ioapic_irq(irq, *mask);
   }
   #endif
   
   asmlinkage void smp_irq_move_cleanup_interrupt(void)
   {
         unsigned vector, me;
+ 
         ack_APIC_irq();
- #ifdef CONFIG_X86_64
         exit_idle();
- #endif
         irq_enter();
   
         me = smp_processor_id();
@@@ -3028,7 -3026,7 +3027,7 @@@ static int msi_compose_msg(struct pci_d
   }
   
   #ifdef CONFIG_SMP
- -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+ +static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_cfg *cfg;
         struct msi_msg msg;
@@@ -3036,14 -3034,15 +3035,14 @@@
         cpumask_t tmp;
         struct irq_desc *desc;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
- -      if (assign_irq_vector(irq, mask))
+ +      if (assign_irq_vector(irq, *mask))
                 return;
   
         cfg = irq_cfg(irq);
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
         read_msi_msg(irq, &msg);
@@@ -3055,7 -3054,7 +3054,7 @@@
   
         write_msi_msg(irq, &msg);
         desc = irq_to_desc(irq);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
   
   #ifdef CONFIG_INTR_REMAP
@@@ -3063,8 -3062,7 +3062,8 @@@
    * Migrate the MSI irq to another cpumask. This migration is
    * done in the process context using interrupt-remapping hardware.
    */
- -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+ +static void ir_set_msi_irq_affinity(unsigned int irq,
+ +                                  const struct cpumask *mask)
   {
         struct irq_cfg *cfg;
         unsigned int dest;
@@@ -3072,17 -3070,18 +3071,17 @@@
         struct irte irte;
         struct irq_desc *desc;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
         if (get_irte(irq, &irte))
                 return;
   
- -      if (assign_irq_vector(irq, mask))
+ +      if (assign_irq_vector(irq, *mask))
                 return;
   
         cfg = irq_cfg(irq);
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
         irte.vector = cfg->vector;
@@@ -3106,7 -3105,7 +3105,7 @@@
         }
   
         desc = irq_to_desc(irq);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
   #endif
   #endif /* CONFIG_SMP */
@@@ -3308,7 -3307,7 +3307,7 @@@ void arch_teardown_msi_irq(unsigned in
   
   #ifdef CONFIG_DMAR
   #ifdef CONFIG_SMP
- -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+ +static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_cfg *cfg;
         struct msi_msg msg;
@@@ -3316,14 -3315,15 +3315,14 @@@
         cpumask_t tmp;
         struct irq_desc *desc;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
- -      if (assign_irq_vector(irq, mask))
+ +      if (assign_irq_vector(irq, *mask))
                 return;
   
         cfg = irq_cfg(irq);
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
         dmar_msi_read(irq, &msg);
@@@ -3335,7 -3335,7 +3334,7 @@@
   
         dmar_msi_write(irq, &msg);
         desc = irq_to_desc(irq);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
   #endif /* CONFIG_SMP */
   
@@@ -3368,7 -3368,7 +3367,7 @@@ int arch_setup_dmar_msi(unsigned int ir
   #ifdef CONFIG_HPET_TIMER
   
   #ifdef CONFIG_SMP
- -static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+ +static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_cfg *cfg;
         struct irq_desc *desc;
@@@ -3376,14 -3376,15 +3375,14 @@@
         unsigned int dest;
         cpumask_t tmp;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
- -      if (assign_irq_vector(irq, mask))
+ +      if (assign_irq_vector(irq, *mask))
                 return;
   
         cfg = irq_cfg(irq);
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
         hpet_msi_read(irq, &msg);
@@@ -3395,7 -3396,7 +3394,7 @@@
   
         hpet_msi_write(irq, &msg);
         desc = irq_to_desc(irq);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
   #endif /* CONFIG_SMP */
   
@@@ -3449,26 -3450,27 +3448,26 @@@ static void target_ht_irq(unsigned int 
         write_ht_irq_msg(irq, &msg);
   }
   
- -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+ +static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_cfg *cfg;
         unsigned int dest;
         cpumask_t tmp;
         struct irq_desc *desc;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
- -      if (assign_irq_vector(irq, mask))
+ +      if (assign_irq_vector(irq, *mask))
                 return;
   
         cfg = irq_cfg(irq);
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
         target_ht_irq(irq, dest, cfg->vector);
         desc = irq_to_desc(irq);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
   #endif
   
@@@ -3791,10 -3793,10 +3790,10 @@@ void __init setup_ioapic_dest(void
   
   #ifdef CONFIG_INTR_REMAP
                         if (intr_remapping_enabled)
- -                              set_ir_ioapic_affinity_irq(irq, mask);
+ +                              set_ir_ioapic_affinity_irq(irq, &mask);
                         else
   #endif
- -                              set_ioapic_affinity_irq(irq, mask);
+ +                              set_ioapic_affinity_irq(irq, &mask);
                 }
   
         }
diff --combined arch/x86/kernel/irq_64.c

index 7d37f84,1df869e..8cbd069
--- 1/arch/x86/kernel/irq_64.c
--- 2/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@@ -13,12 -13,12 +13,12 @@@
   #include <linux/seq_file.h>
   #include <linux/module.h>
   #include <linux/delay.h>
+ #include <linux/ftrace.h>
   #include <asm/uaccess.h>
   #include <asm/io_apic.h>
   #include <asm/idle.h>
   #include <asm/smp.h>
   
- #ifdef CONFIG_DEBUG_STACKOVERFLOW
   /*
    * Probabilistic stack overflow check:
    *
@@@ -28,26 -28,25 +28,25 @@@
    */
   static inline void stack_overflow_check(struct pt_regs *regs)
   {
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
         u64 curbase = (u64)task_stack_page(current);
-       static unsigned long warned = -60*HZ;
- 
-       if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
-           regs->sp <  curbase + sizeof(struct thread_info) + 128 &&
-           time_after(jiffies, warned + 60*HZ)) {
-               printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
-                      current->comm, curbase, regs->sp);
-               show_stack(NULL,NULL);
-               warned = jiffies;
-       }
- }
+ 
+       WARN_ONCE(regs->sp >= curbase &&
+                 regs->sp <= curbase + THREAD_SIZE &&
+                 regs->sp <  curbase + sizeof(struct thread_info) +
+                                       sizeof(struct pt_regs) + 128,
+ 
+                 "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+                       current->comm, curbase, regs->sp);
   #endif
+ }
   
   /*
    * do_IRQ handles all normal device IRQ's (the special
    * SMP cross-CPU interrupts have their own specific
    * handlers).
    */
- asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
   {
         struct pt_regs *old_regs = set_irq_regs(regs);
         struct irq_desc *desc;
@@@ -60,9 -59,7 +59,7 @@@
         irq_enter();
         irq = __get_cpu_var(vector_irq)[vector];
   
- #ifdef CONFIG_DEBUG_STACKOVERFLOW
         stack_overflow_check(regs);
- #endif
   
         desc = irq_to_desc(irq);
         if (likely(desc))
@@@ -116,7 -113,7 +113,7 @@@ void fixup_irqs(cpumask_t map
                         desc->chip->mask(irq);
   
                 if (desc->chip->set_affinity)
- -                      desc->chip->set_affinity(irq, mask);
+ +                      desc->chip->set_affinity(irq, &mask);
                 else if (!(warned++))
                         set_affinity = 0;
   
diff --combined arch/x86/kernel/smpboot.c

index d5274b6,f8500c9..c539205
--- 1/arch/x86/kernel/smpboot.c
--- 2/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@@ -62,6 -62,7 +62,7 @@@
   #include <asm/mtrr.h>
   #include <asm/vmi.h>
   #include <asm/genapic.h>
+ #include <asm/setup.h>
   #include <linux/mc146818rtc.h>
   
   #include <mach_apic.h>
@@@ -101,8 -102,14 +102,8 @@@ EXPORT_SYMBOL(smp_num_siblings)
   /* Last level cache ID of each logical CPU */
   DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
   
- -/* bitmap of online cpus */
- -cpumask_t cpu_online_map __read_mostly;
- -EXPORT_SYMBOL(cpu_online_map);
- -
   cpumask_t cpu_callin_map;
   cpumask_t cpu_callout_map;
- -cpumask_t cpu_possible_map;
- -EXPORT_SYMBOL(cpu_possible_map);
   
   /* representing HT siblings of each logical CPU */
   DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@@ -281,16 -288,14 +282,14 @@@ static int __cpuinitdata unsafe_smp
   /*
    * Activate a secondary processor.
    */
- static void __cpuinit start_secondary(void *unused)
+ notrace static void __cpuinit start_secondary(void *unused)
   {
         /*
          * Don't put *anything* before cpu_init(), SMP booting is too
          * fragile that we want to limit the things done here to the
          * most necessary things.
          */
- #ifdef CONFIG_VMI
         vmi_bringup();
- #endif
         cpu_init();
         preempt_disable();
         smp_callin();
@@@ -497,7 -502,7 +496,7 @@@ void __cpuinit set_cpu_sibling_map(int 
   }
   
   /* maps the cpu to the sched domain representing multi-core */
- -cpumask_t cpu_coregroup_map(int cpu)
+ +const struct cpumask *cpu_coregroup_mask(int cpu)
   {
         struct cpuinfo_x86 *c = &cpu_data(cpu);
         /*
@@@ -505,14 -510,9 +504,14 @@@
          * And for power savings, we return cpu_core_map
          */
         if (sched_mc_power_savings || sched_smt_power_savings)
- -              return per_cpu(cpu_core_map, cpu);
+ +              return &per_cpu(cpu_core_map, cpu);
         else
- -              return c->llc_shared_map;
+ +              return &c->llc_shared_map;
+ +}
+ +
+ +cpumask_t cpu_coregroup_map(int cpu)
+ +{
+ +      return *cpu_coregroup_mask(cpu);
   }
   
   static void impress_friends(void)
@@@ -535,7 -535,7 +534,7 @@@
         pr_debug("Before bogocount - setting activated=1.\n");
   }
   
- static inline void __inquire_remote_apic(int apicid)
+ void __inquire_remote_apic(int apicid)
   {
         unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
         char *names[] = { "ID", "VERSION", "SPIV" };
@@@ -574,14 -574,13 +573,13 @@@
         }
   }
   
- #ifdef WAKE_SECONDARY_VIA_NMI
   /*
    * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
    * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
    * won't ... remember to clear down the APIC, etc later.
    */
- static int __devinit
- wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+ int __devinit
+ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
   {
         unsigned long send_status, accept_status = 0;
         int maxlvt;
@@@ -598,7 -597,7 +596,7 @@@
          * Give the other CPU some time to accept the IPI.
          */
         udelay(200);
-       if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+       if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
                 maxlvt = lapic_get_maxlvt();
                 if (maxlvt > 3)                 /* Due to the Pentium erratum 3AP.  */
                         apic_write(APIC_ESR, 0);
@@@ -613,11 -612,9 +611,9 @@@
   
         return (send_status | accept_status);
   }
- #endif        /* WAKE_SECONDARY_VIA_NMI */
   
- #ifdef WAKE_SECONDARY_VIA_INIT
- static int __devinit
- wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+ int __devinit
+ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
   {
         unsigned long send_status, accept_status = 0;
         int maxlvt, num_starts, j;
@@@ -736,7 -733,6 +732,6 @@@
   
         return (send_status | accept_status);
   }
- #endif        /* WAKE_SECONDARY_VIA_INIT */
   
   struct create_idle {
         struct work_struct work;
@@@ -1085,8 -1081,10 +1080,10 @@@ static int __init smp_sanity_check(unsi
   #endif
   
         if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
-               printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
-                                   "by the BIOS.\n", hard_smp_processor_id());
+               printk(KERN_WARNING
+                       "weird, boot CPU (#%d) not listed by the BIOS.\n",
+                       hard_smp_processor_id());
+ 
                 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
         }
   
diff --combined arch/x86/lguest/boot.c

index 104c822,50a7792..a7ed208
--- 1/arch/x86/lguest/boot.c
--- 2/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@@ -590,7 -590,8 +590,8 @@@ static void __init lguest_init_IRQ(void
                  * a straightforward 1 to 1 mapping, so force that here. */
                 __get_cpu_var(vector_irq)[vector] = i;
                 if (vector != SYSCALL_VECTOR) {
-                       set_intr_gate(vector, interrupt[vector]);
+                       set_intr_gate(vector,
+                                     interrupt[vector-FIRST_EXTERNAL_VECTOR]);
                         set_irq_chip_and_handler_name(i, &lguest_irq_controller,
                                                       handle_level_irq,
                                                       "level");
@@@ -737,7 -738,7 +738,7 @@@ static void lguest_time_init(void
   
         /* We can't set cpumask in the initializer: damn C limitations!  Set it
          * here and register our timer device. */
- -      lguest_clockevent.cpumask = cpumask_of_cpu(0);
+ +      lguest_clockevent.cpumask = cpumask_of(0);
         clockevents_register_device(&lguest_clockevent);
   
         /* Finally, we unblock the timer interrupt. */
diff --combined include/linux/smp.h

index 0d5770c,6e7ba16..2f85f3b
--- 1/include/linux/smp.h
--- 2/include/linux/smp.h
+++ b/include/linux/smp.h
@@@ -21,9 -21,6 +21,9 @@@ struct call_single_data 
         u16 priv;
   };
   
+ +/* total number of cpus in this system (may exceed NR_CPUS) */
+ +extern unsigned int total_cpus;
+ +
   #ifdef CONFIG_SMP
   
   #include <linux/preempt.h>
@@@ -149,6 -146,8 +149,8 @@@ static inline void smp_send_reschedule(
   })
   #define smp_call_function_mask(mask, func, info, wait) \
                         (up_smp_call_function(func, info))
+ #define smp_call_function_many(mask, func, info, wait) \
+                       (up_smp_call_function(func, info))
   static inline void init_call_single_data(void)
   {
   }
diff --combined init/Kconfig

index 7656623,8a63c40..8e9904f
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -588,6 -588,13 +588,13 @@@ config KALLSYMS_AL
   
            Say N.
   
+ config KALLSYMS_STRIP_GENERATED
+       bool "Strip machine generated symbols from kallsyms"
+       depends on KALLSYMS_ALL
+       default y
+       help
+         Say N if you want kallsyms to retain even machine generated symbols.
+ 
   config KALLSYMS_EXTRA_PASS
         bool "Do an extra kallsyms pass"
         depends on KALLSYMS
@@@ -808,6 -815,7 +815,7 @@@ config TRACEPOINT
   
   config MARKERS
         bool "Activate markers"
+       depends on TRACEPOINTS
         help
           Place an empty function call at each marker site. Can be
           dynamically changed for a probe function.
@@@ -916,15 -924,6 +924,15 @@@ config KMO
   
   endif # MODULES
   
+ +config INIT_ALL_POSSIBLE
+ +      bool
+ +      help
+ +        Back when each arch used to define their own cpu_online_map and
+ +        cpu_possible_map, some of them chose to initialize cpu_possible_map
+ +        with all 1s, and others with all 0s.  When they were centralised,
+ +        it was better to provide this option than to break all the archs
+ +        and have several arch maintainers persuing me down dark alleys.
+ +
   config STOP_MACHINE
         bool
         default y
diff --combined kernel/profile.c

index 7d620df,60adefb..4cb7d68
--- 1/kernel/profile.c
--- 2/kernel/profile.c
+++ b/kernel/profile.c
@@@ -442,7 -442,7 +442,7 @@@ void profile_tick(int type
   static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
                         int count, int *eof, void *data)
   {
- -      int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+ +      int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
         if (count - len < 2)
                 return -EINVAL;
         len += sprintf(page + len, "\n");
@@@ -456,7 -456,7 +456,7 @@@ static int prof_cpu_mask_write_proc(str
         unsigned long full_count = count, err;
         cpumask_t new_value;
   
- -      err = cpumask_parse_user(buffer, count, new_value);
+ +      err = cpumask_parse_user(buffer, count, &new_value);
         if (err)
                 return err;
   
@@@ -544,7 -544,7 +544,7 @@@ static const struct file_operations pro
   };
   
   #ifdef CONFIG_SMP
- static inline void profile_nop(void *unused)
+ static void profile_nop(void *unused)
   {
   }
   
diff --combined kernel/sched.c

index 4292923,748ff92..bdd180a
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -118,6 -118,12 +118,12 @@@
    */
   #define RUNTIME_INF   ((u64)~0ULL)
   
+ DEFINE_TRACE(sched_wait_task);
+ DEFINE_TRACE(sched_wakeup);
+ DEFINE_TRACE(sched_wakeup_new);
+ DEFINE_TRACE(sched_switch);
+ DEFINE_TRACE(sched_migrate_task);
+ 
   #ifdef CONFIG_SMP
   /*
    * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
@@@ -261,6 -267,10 +267,10 @@@ struct task_group 
         struct cgroup_subsys_state css;
   #endif
   
+ #ifdef CONFIG_USER_SCHED
+       uid_t uid;
+ #endif
+ 
   #ifdef CONFIG_FAIR_GROUP_SCHED
         /* schedulable entities of this group on each cpu */
         struct sched_entity **se;
@@@ -286,6 -296,12 +296,12 @@@
   
   #ifdef CONFIG_USER_SCHED
   
+ /* Helper function to pass uid information to create_sched_user() */
+ void set_tg_uid(struct user_struct *user)
+ {
+       user->tg->uid = user->uid;
+ }
+ 
   /*
    * Root task group.
    *    Every UID task group (including init_task_group aka UID-0) will
@@@ -345,7 -361,9 +361,9 @@@ static inline struct task_group *task_g
         struct task_group *tg;
   
   #ifdef CONFIG_USER_SCHED
-       tg = p->user->tg;
+       rcu_read_lock();
+       tg = __task_cred(p)->user->tg;
+       rcu_read_unlock();
   #elif defined(CONFIG_CGROUP_SCHED)
         tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
                                 struct task_group, css);
@@@ -586,6 -604,8 +604,8 @@@ struct rq 
   #ifdef CONFIG_SCHEDSTATS
         /* latency stats */
         struct sched_info rq_sched_info;
+       unsigned long long rq_cpu_time;
+       /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
   
         /* sys_sched_yield() stats */
         unsigned int yld_exp_empty;
@@@ -703,45 -723,18 +723,18 @@@ static __read_mostly char *sched_feat_n
   
   #undef SCHED_FEAT
   
- static int sched_feat_open(struct inode *inode, struct file *filp)
- {
-       filp->private_data = inode->i_private;
-       return 0;
- }
- 
- static ssize_t
- sched_feat_read(struct file *filp, char __user *ubuf,
-               size_t cnt, loff_t *ppos)
+ static int sched_feat_show(struct seq_file *m, void *v)
   {
         int i;
   
         for (i = 0; sched_feat_names[i]; i++) {
-               len += strlen(sched_feat_names[i]);
-               len += 4;
-       }
- 
-       buf = kmalloc(len + 2, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
- 
-       for (i = 0; sched_feat_names[i]; i++) {
-               if (sysctl_sched_features & (1UL << i))
-                       r += sprintf(buf + r, "%s ", sched_feat_names[i]);
-               else
-                       r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
+               if (!(sysctl_sched_features & (1UL << i)))
+                       seq_puts(m, "NO_");
+               seq_printf(m, "%s ", sched_feat_names[i]);
         }
+       seq_puts(m, "\n");
   
-       r += sprintf(buf + r, "\n");
-       WARN_ON(r >= len + 2);
- 
-       r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
- 
-       kfree(buf);
- 
-       return r;
+       return 0;
   }
   
   static ssize_t
@@@ -786,10 -779,17 +779,17 @@@ sched_feat_write(struct file *filp, con
         return cnt;
   }
   
+ static int sched_feat_open(struct inode *inode, struct file *filp)
+ {
+       return single_open(filp, sched_feat_show, NULL);
+ }
+ 
   static struct file_operations sched_feat_fops = {
-       .open   = sched_feat_open,
-       .read   = sched_feat_read,
-       .write  = sched_feat_write,
+       .open           = sched_feat_open,
+       .write          = sched_feat_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
   };
   
   static __init int sched_init_debug(void)
@@@ -1474,27 -1474,13 +1474,13 @@@ static voi
   update_group_shares_cpu(struct task_group *tg, int cpu,
                         unsigned long sd_shares, unsigned long sd_rq_weight)
   {
-       int boost = 0;
         unsigned long shares;
         unsigned long rq_weight;
   
         if (!tg->se[cpu])
                 return;
   
-       rq_weight = tg->cfs_rq[cpu]->load.weight;
- 
-       /*
-        * If there are currently no tasks on the cpu pretend there is one of
-        * average load so that when a new task gets to run here it will not
-        * get delayed by group starvation.
-        */
-       if (!rq_weight) {
-               boost = 1;
-               rq_weight = NICE_0_LOAD;
-       }
- 
-       if (unlikely(rq_weight > sd_rq_weight))
-               rq_weight = sd_rq_weight;
+       rq_weight = tg->cfs_rq[cpu]->rq_weight;
   
         /*
          *           \Sum shares * rq_weight
@@@ -1502,7 -1488,7 +1488,7 @@@
          *               \Sum rq_weight
          *
          */
-       shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+       shares = (sd_shares * rq_weight) / sd_rq_weight;
         shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
   
         if (abs(shares - tg->se[cpu]->load.weight) >
@@@ -1511,11 -1497,7 +1497,7 @@@
                 unsigned long flags;
   
                 spin_lock_irqsave(&rq->lock, flags);
-               /*
-                * record the actual number of shares, not the boosted amount.
-                */
-               tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
-               tg->cfs_rq[cpu]->rq_weight = rq_weight;
+               tg->cfs_rq[cpu]->shares = shares;
   
                 __set_se_shares(tg->se[cpu], shares);
                 spin_unlock_irqrestore(&rq->lock, flags);
@@@ -1529,13 -1511,23 +1511,23 @@@
    */
   static int tg_shares_up(struct task_group *tg, void *data)
   {
-       unsigned long rq_weight = 0;
+       unsigned long weight, rq_weight = 0;
         unsigned long shares = 0;
         struct sched_domain *sd = data;
         int i;
   
         for_each_cpu_mask(i, sd->span) {
-               rq_weight += tg->cfs_rq[i]->load.weight;
+               /*
+                * If there are currently no tasks on the cpu pretend there
+                * is one of average load so that when a new task gets to
+                * run here it will not get delayed by group starvation.
+                */
+               weight = tg->cfs_rq[i]->load.weight;
+               if (!weight)
+                       weight = NICE_0_LOAD;
+ 
+               tg->cfs_rq[i]->rq_weight = weight;
+               rq_weight += weight;
                 shares += tg->cfs_rq[i]->shares;
         }
   
@@@ -1545,9 -1537,6 +1537,6 @@@
         if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
                 shares = tg->shares;
   
-       if (!rq_weight)
-               rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
- 
         for_each_cpu_mask(i, sd->span)
                 update_group_shares_cpu(tg, i, shares, rq_weight);
   
@@@ -1612,6 -1601,39 +1601,39 @@@ static inline void update_shares_locked
   
   #endif
   
+ /*
+  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+  */
+ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+       __releases(this_rq->lock)
+       __acquires(busiest->lock)
+       __acquires(this_rq->lock)
+ {
+       int ret = 0;
+ 
+       if (unlikely(!irqs_disabled())) {
+               /* printk() doesn't work good under rq->lock */
+               spin_unlock(&this_rq->lock);
+               BUG_ON(1);
+       }
+       if (unlikely(!spin_trylock(&busiest->lock))) {
+               if (busiest < this_rq) {
+                       spin_unlock(&this_rq->lock);
+                       spin_lock(&busiest->lock);
+                       spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
+                       ret = 1;
+               } else
+                       spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+       }
+       return ret;
+ }
+ 
+ static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+       __releases(busiest->lock)
+ {
+       spin_unlock(&busiest->lock);
+       lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+ }
   #endif
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
@@@ -1845,6 -1867,8 +1867,8 @@@ void set_task_cpu(struct task_struct *p
   
         clock_offset = old_rq->clock - new_rq->clock;
   
+       trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+ 
   #ifdef CONFIG_SCHEDSTATS
         if (p->se.wait_start)
                 p->se.wait_start -= clock_offset;
@@@ -2254,6 -2278,7 +2278,7 @@@ static int try_to_wake_up(struct task_s
   
         smp_wmb();
         rq = task_rq_lock(p, &flags);
+       update_rq_clock(rq);
         old_state = p->state;
         if (!(old_state & state))
                 goto out;
@@@ -2311,12 -2336,11 +2336,11 @@@ out_activate
                 schedstat_inc(p, se.nr_wakeups_local);
         else
                 schedstat_inc(p, se.nr_wakeups_remote);
-       update_rq_clock(rq);
         activate_task(rq, p, 1);
         success = 1;
   
   out_running:
-       trace_sched_wakeup(rq, p);
+       trace_sched_wakeup(rq, p, success);
         check_preempt_curr(rq, p, sync);
   
         p->state = TASK_RUNNING;
@@@ -2449,7 -2473,7 +2473,7 @@@ void wake_up_new_task(struct task_struc
                 p->sched_class->task_new(rq, p);
                 inc_nr_running(rq);
         }
-       trace_sched_wakeup_new(rq, p);
+       trace_sched_wakeup_new(rq, p, 1);
         check_preempt_curr(rq, p, 0);
   #ifdef CONFIG_SMP
         if (p->sched_class->task_wake_up)
@@@ -2812,40 -2836,6 +2836,6 @@@ static void double_rq_unlock(struct rq 
   }
   
   /*
-  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
-  */
- static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
-       __releases(this_rq->lock)
-       __acquires(busiest->lock)
-       __acquires(this_rq->lock)
- {
-       int ret = 0;
- 
-       if (unlikely(!irqs_disabled())) {
-               /* printk() doesn't work good under rq->lock */
-               spin_unlock(&this_rq->lock);
-               BUG_ON(1);
-       }
-       if (unlikely(!spin_trylock(&busiest->lock))) {
-               if (busiest < this_rq) {
-                       spin_unlock(&this_rq->lock);
-                       spin_lock(&busiest->lock);
-                       spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
-                       ret = 1;
-               } else
-                       spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
-       }
-       return ret;
- }
- 
- static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
-       __releases(busiest->lock)
- {
-       spin_unlock(&busiest->lock);
-       lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
- }
- 
- /*
    * If dest_cpu is allowed for this process, migrate the task to it.
    * This is accomplished by forcing the cpu_allowed mask to only
    * allow dest_cpu, which will force the cpu onto dest_cpu. Then
@@@ -2862,7 -2852,6 +2852,6 @@@ static void sched_migrate_task(struct t
             || unlikely(!cpu_active(dest_cpu)))
                 goto out;
   
-       trace_sched_migrate_task(rq, p, dest_cpu);
         /* force the process onto the specified CPU */
         if (migrate_task(p, dest_cpu, &req)) {
                 /* Need to wait for migration thread (might exit: take ref). */
@@@ -3707,7 -3696,7 +3696,7 @@@ out_balanced
   static void idle_balance(int this_cpu, struct rq *this_rq)
   {
         struct sched_domain *sd;
-       int pulled_task = -1;
+       int pulled_task = 0;
         unsigned long next_balance = jiffies + HZ;
         cpumask_t tmpmask;
   
@@@ -5134,6 -5123,22 +5123,22 @@@ __setscheduler(struct rq *rq, struct ta
         set_load_weight(p);
   }
   
+ /*
+  * check the target process has a UID that matches the current process's
+  */
+ static bool check_same_owner(struct task_struct *p)
+ {
+       const struct cred *cred = current_cred(), *pcred;
+       bool match;
+ 
+       rcu_read_lock();
+       pcred = __task_cred(p);
+       match = (cred->euid == pcred->euid ||
+                cred->euid == pcred->uid);
+       rcu_read_unlock();
+       return match;
+ }
+ 
   static int __sched_setscheduler(struct task_struct *p, int policy,
                                 struct sched_param *param, bool user)
   {
@@@ -5193,8 -5198,7 +5198,7 @@@ recheck
                         return -EPERM;
   
                 /* can't change other user's priorities */
-               if ((current->euid != p->euid) &&
-                   (current->euid != p->uid))
+               if (!check_same_owner(p))
                         return -EPERM;
         }
   
@@@ -5426,8 -5430,7 +5430,7 @@@ long sched_setaffinity(pid_t pid, cons
         read_unlock(&tasklist_lock);
   
         retval = -EPERM;
-       if ((current->euid != p->euid) && (current->euid != p->uid) &&
-                       !capable(CAP_SYS_NICE))
+       if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
                 goto out_unlock;
   
         retval = security_task_setscheduler(p, 0, NULL);
@@@ -5896,6 -5899,7 +5899,7 @@@ void __cpuinit init_idle(struct task_st
          * The idle tasks have their own, simple scheduling class:
          */
         idle->sched_class = &idle_sched_class;
+       ftrace_graph_init_task(idle);
   }
   
   /*
@@@ -6126,7 -6130,6 +6130,6 @@@ static int __migrate_task_irq(struct ta
   
   /*
    * Figure out where task on dead CPU should go, use force if necessary.
-  * NOTE: interrupts should be disabled by the caller
    */
   static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
   {
@@@ -6638,35 -6641,13 +6641,13 @@@ early_initcall(migration_init)
   
   #ifdef CONFIG_SCHED_DEBUG
   
- static inline const char *sd_level_to_string(enum sched_domain_level lvl)
- {
-       switch (lvl) {
-       case SD_LV_NONE:
-                       return "NONE";
-       case SD_LV_SIBLING:
-                       return "SIBLING";
-       case SD_LV_MC:
-                       return "MC";
-       case SD_LV_CPU:
-                       return "CPU";
-       case SD_LV_NODE:
-                       return "NODE";
-       case SD_LV_ALLNODES:
-                       return "ALLNODES";
-       case SD_LV_MAX:
-                       return "MAX";
- 
-       }
-       return "MAX";
- }
- 
   static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                                   cpumask_t *groupmask)
   {
         struct sched_group *group = sd->groups;
         char str[256];
   
- -      cpulist_scnprintf(str, sizeof(str), sd->span);
+ +      cpulist_scnprintf(str, sizeof(str), &sd->span);
         cpus_clear(*groupmask);
   
         printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@@ -6679,8 -6660,7 +6660,7 @@@
                 return -1;
         }
   
-       printk(KERN_CONT "span %s level %s\n",
-               str, sd_level_to_string(sd->level));
+       printk(KERN_CONT "span %s level %s\n", str, sd->name);
   
         if (!cpu_isset(cpu, sd->span)) {
                 printk(KERN_ERR "ERROR: domain->span does not contain "
@@@ -6720,7 -6700,7 +6700,7 @@@
   
                 cpus_or(*groupmask, *groupmask, group->cpumask);
   
- -              cpulist_scnprintf(str, sizeof(str), group->cpumask);
+ +              cpulist_scnprintf(str, sizeof(str), &group->cpumask);
                 printk(KERN_CONT " %s", str);
   
                 group = group->next;
@@@ -6816,6 -6796,8 +6796,8 @@@ sd_parent_degenerate(struct sched_domai
                                 SD_BALANCE_EXEC |
                                 SD_SHARE_CPUPOWER |
                                 SD_SHARE_PKG_RESOURCES);
+               if (nr_node_ids == 1)
+                       pflags &= ~SD_SERIALIZE;
         }
         if (~cflags & pflags)
                 return 0;
@@@ -7119,7 -7101,7 +7101,7 @@@ cpu_to_phys_group(int cpu, const cpumas
   {
         int group;
   #ifdef CONFIG_SCHED_MC
- -      *mask = cpu_coregroup_map(cpu);
+ +      *mask = *cpu_coregroup_mask(cpu);
         cpus_and(*mask, *mask, *cpu_map);
         group = first_cpu(*mask);
   #elif defined(CONFIG_SCHED_SMT)
@@@ -7336,13 -7318,21 +7318,21 @@@ struct allmasks 
   };
   
   #if   NR_CPUS > 128
- #define       SCHED_CPUMASK_ALLOC             1
- #define       SCHED_CPUMASK_FREE(v)           kfree(v)
- #define       SCHED_CPUMASK_DECLARE(v)        struct allmasks *v
+ #define SCHED_CPUMASK_DECLARE(v)      struct allmasks *v
+ static inline void sched_cpumask_alloc(struct allmasks **masks)
+ {
+       *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
+ }
+ static inline void sched_cpumask_free(struct allmasks *masks)
+ {
+       kfree(masks);
+ }
   #else
- #define       SCHED_CPUMASK_ALLOC             0
- #define       SCHED_CPUMASK_FREE(v)
- #define       SCHED_CPUMASK_DECLARE(v)        struct allmasks _v, *v = &_v
+ #define SCHED_CPUMASK_DECLARE(v)      struct allmasks _v, *v = &_v
+ static inline void sched_cpumask_alloc(struct allmasks **masks)
+ { }
+ static inline void sched_cpumask_free(struct allmasks *masks)
+ { }
   #endif
   
   #define       SCHED_CPUMASK_VAR(v, a)         cpumask_t *v = (cpumask_t *) \
@@@ -7418,9 -7408,8 +7408,8 @@@ static int __build_sched_domains(const 
                 return -ENOMEM;
         }
   
- #if SCHED_CPUMASK_ALLOC
         /* get space for all scratch cpumask variables */
-       allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
+       sched_cpumask_alloc(&allmasks);
         if (!allmasks) {
                 printk(KERN_WARNING "Cannot alloc cpumask array\n");
                 kfree(rd);
@@@ -7429,7 -7418,7 +7418,7 @@@
   #endif
                 return -ENOMEM;
         }
- #endif
+ 
         tmpmask = (cpumask_t *)allmasks;
   
   
@@@ -7485,7 -7474,7 +7474,7 @@@
                 sd = &per_cpu(core_domains, i);
                 SD_INIT(sd, MC);
                 set_domain_attribute(sd, attr);
- -              sd->span = cpu_coregroup_map(i);
+ +              sd->span = *cpu_coregroup_mask(i);
                 cpus_and(sd->span, sd->span, *cpu_map);
                 sd->parent = p;
                 p->child = sd;
@@@ -7528,7 -7517,7 +7517,7 @@@
                 SCHED_CPUMASK_VAR(this_core_map, allmasks);
                 SCHED_CPUMASK_VAR(send_covered, allmasks);
   
- -              *this_core_map = cpu_coregroup_map(i);
+ +              *this_core_map = *cpu_coregroup_mask(i);
                 cpus_and(*this_core_map, *this_core_map, *cpu_map);
                 if (i != first_cpu(*this_core_map))
                         continue;
@@@ -7683,13 -7672,13 +7672,13 @@@
                 cpu_attach_domain(sd, rd, i);
         }
   
-       SCHED_CPUMASK_FREE((void *)allmasks);
+       sched_cpumask_free(allmasks);
         return 0;
   
   #ifdef CONFIG_NUMA
   error:
         free_sched_groups(cpu_map, tmpmask);
-       SCHED_CPUMASK_FREE((void *)allmasks);
+       sched_cpumask_free(allmasks);
         kfree(rd);
         return -ENOMEM;
   #endif
@@@ -7712,8 -7701,14 +7701,14 @@@ static struct sched_domain_attr *dattr_
    */
   static cpumask_t fallback_doms;
   
- void __attribute__((weak)) arch_update_cpu_topology(void)
+ /*
+  * arch_update_cpu_topology lets virtualized architectures update the
+  * cpu core maps. It is supposed to return 1 if the topology changed
+  * or 0 if it stayed the same.
+  */
+ int __attribute__((weak)) arch_update_cpu_topology(void)
   {
+       return 0;
   }
   
   /*
@@@ -7753,8 -7748,6 +7748,6 @@@ static void detach_destroy_domains(cons
         cpumask_t tmpmask;
         int i;
   
-       unregister_sched_domain_sysctl();
- 
         for_each_cpu_mask_nr(i, *cpu_map)
                 cpu_attach_domain(NULL, &def_root_domain, i);
         synchronize_sched();
@@@ -7807,17 -7800,21 +7800,21 @@@ void partition_sched_domains(int ndoms_
                              struct sched_domain_attr *dattr_new)
   {
         int i, j, n;
+       int new_topology;
   
         mutex_lock(&sched_domains_mutex);
   
         /* always unregister in case we don't destroy any domains */
         unregister_sched_domain_sysctl();
   
+       /* Let architecture update cpu core mappings. */
+       new_topology = arch_update_cpu_topology();
+ 
         n = doms_new ? ndoms_new : 0;
   
         /* Destroy deleted domains */
         for (i = 0; i < ndoms_cur; i++) {
-               for (j = 0; j < n; j++) {
+               for (j = 0; j < n && !new_topology; j++) {
                         if (cpus_equal(doms_cur[i], doms_new[j])
                             && dattrs_equal(dattr_cur, i, dattr_new, j))
                                 goto match1;
@@@ -7832,12 -7829,12 +7829,12 @@@ match1
                 ndoms_cur = 0;
                 doms_new = &fallback_doms;
                 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
-               dattr_new = NULL;
+               WARN_ON_ONCE(dattr_new);
         }
   
         /* Build new domains */
         for (i = 0; i < ndoms_new; i++) {
-               for (j = 0; j < ndoms_cur; j++) {
+               for (j = 0; j < ndoms_cur && !new_topology; j++) {
                         if (cpus_equal(doms_new[i], doms_cur[j])
                             && dattrs_equal(dattr_new, i, dattr_cur, j))
                                 goto match2;
@@@ -8492,7 -8489,7 +8489,7 @@@ stati
   int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
   {
         struct cfs_rq *cfs_rq;
-       struct sched_entity *se, *parent_se;
+       struct sched_entity *se;
         struct rq *rq;
         int i;
   
@@@ -8508,18 -8505,17 +8505,17 @@@
         for_each_possible_cpu(i) {
                 rq = cpu_rq(i);
   
-               cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
-                               GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+               cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
+                                     GFP_KERNEL, cpu_to_node(i));
                 if (!cfs_rq)
                         goto err;
   
-               se = kmalloc_node(sizeof(struct sched_entity),
-                               GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+               se = kzalloc_node(sizeof(struct sched_entity),
+                                 GFP_KERNEL, cpu_to_node(i));
                 if (!se)
                         goto err;
   
-               parent_se = parent ? parent->se[i] : NULL;
-               init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
+               init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
         }
   
         return 1;
@@@ -8580,7 -8576,7 +8576,7 @@@ stati
   int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
   {
         struct rt_rq *rt_rq;
-       struct sched_rt_entity *rt_se, *parent_se;
+       struct sched_rt_entity *rt_se;
         struct rq *rq;
         int i;
   
@@@ -8597,18 -8593,17 +8593,17 @@@
         for_each_possible_cpu(i) {
                 rq = cpu_rq(i);
   
-               rt_rq = kmalloc_node(sizeof(struct rt_rq),
-                               GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+               rt_rq = kzalloc_node(sizeof(struct rt_rq),
+                                    GFP_KERNEL, cpu_to_node(i));
                 if (!rt_rq)
                         goto err;
   
-               rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
-                               GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+               rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
+                                    GFP_KERNEL, cpu_to_node(i));
                 if (!rt_se)
                         goto err;
   
-               parent_se = parent ? parent->rt_se[i] : NULL;
-               init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
+               init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
         }
   
         return 1;
@@@ -9251,11 -9246,12 +9246,12 @@@ struct cgroup_subsys cpu_cgroup_subsys 
    * (balbir@in.ibm.com).
    */
   
- /* track cpu usage of a group of tasks */
+ /* track cpu usage of a group of tasks and its child groups */
   struct cpuacct {
         struct cgroup_subsys_state css;
         /* cpuusage holds pointer to a u64-type object on every cpu */
         u64 *cpuusage;
+       struct cpuacct *parent;
   };
   
   struct cgroup_subsys cpuacct_subsys;
@@@ -9289,6 -9285,9 +9285,9 @@@ static struct cgroup_subsys_state *cpua
                 return ERR_PTR(-ENOMEM);
         }
   
+       if (cgrp->parent)
+               ca->parent = cgroup_ca(cgrp->parent);
+ 
         return &ca->css;
   }
   
@@@ -9302,6 -9301,41 +9301,41 @@@ cpuacct_destroy(struct cgroup_subsys *s
         kfree(ca);
   }
   
+ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
+ {
+       u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+       u64 data;
+ 
+ #ifndef CONFIG_64BIT
+       /*
+        * Take rq->lock to make 64-bit read safe on 32-bit platforms.
+        */
+       spin_lock_irq(&cpu_rq(cpu)->lock);
+       data = *cpuusage;
+       spin_unlock_irq(&cpu_rq(cpu)->lock);
+ #else
+       data = *cpuusage;
+ #endif
+ 
+       return data;
+ }
+ 
+ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
+ {
+       u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+ 
+ #ifndef CONFIG_64BIT
+       /*
+        * Take rq->lock to make 64-bit write safe on 32-bit platforms.
+        */
+       spin_lock_irq(&cpu_rq(cpu)->lock);
+       *cpuusage = val;
+       spin_unlock_irq(&cpu_rq(cpu)->lock);
+ #else
+       *cpuusage = val;
+ #endif
+ }
+ 
   /* return total cpu usage (in nanoseconds) of a group */
   static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
   {
@@@ -9309,17 -9343,8 +9343,8 @@@
         u64 totalcpuusage = 0;
         int i;
   
-       for_each_possible_cpu(i) {
-               u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
- 
-               /*
-                * Take rq->lock to make 64-bit addition safe on 32-bit
-                * platforms.
-                */
-               spin_lock_irq(&cpu_rq(i)->lock);
-               totalcpuusage += *cpuusage;
-               spin_unlock_irq(&cpu_rq(i)->lock);
-       }
+       for_each_present_cpu(i)
+               totalcpuusage += cpuacct_cpuusage_read(ca, i);
   
         return totalcpuusage;
   }
@@@ -9336,23 -9361,39 +9361,39 @@@ static int cpuusage_write(struct cgrou
                 goto out;
         }
   
-       for_each_possible_cpu(i) {
-               u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
+       for_each_present_cpu(i)
+               cpuacct_cpuusage_write(ca, i, 0);
   
-               spin_lock_irq(&cpu_rq(i)->lock);
-               *cpuusage = 0;
-               spin_unlock_irq(&cpu_rq(i)->lock);
-       }
   out:
         return err;
   }
   
+ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
+                                  struct seq_file *m)
+ {
+       struct cpuacct *ca = cgroup_ca(cgroup);
+       u64 percpu;
+       int i;
+ 
+       for_each_present_cpu(i) {
+               percpu = cpuacct_cpuusage_read(ca, i);
+               seq_printf(m, "%llu ", (unsigned long long) percpu);
+       }
+       seq_printf(m, "\n");
+       return 0;
+ }
+ 
   static struct cftype files[] = {
         {
                 .name = "usage",
                 .read_u64 = cpuusage_read,
                 .write_u64 = cpuusage_write,
         },
+       {
+               .name = "usage_percpu",
+               .read_seq_string = cpuacct_percpu_seq_read,
+       },
+ 
   };
   
   static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@@ -9368,14 -9409,16 +9409,16 @@@
   static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
   {
         struct cpuacct *ca;
+       int cpu;
   
         if (!cpuacct_subsys.active)
                 return;
   
+       cpu = task_cpu(tsk);
         ca = task_ca(tsk);
-       if (ca) {
-               u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
   
+       for (; ca; ca = ca->parent) {
+               u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
                 *cpuusage += cputime;
         }
   }
diff --combined kernel/sched_stats.h

index 6beff1e,3b01098..b59fd9c
--- 1/kernel/sched_stats.h
--- 2/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@@ -31,7 -31,7 +31,7 @@@ static int show_schedstat(struct seq_fi
                     rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
                     rq->sched_switch, rq->sched_count, rq->sched_goidle,
                     rq->ttwu_count, rq->ttwu_local,
-                   rq->rq_sched_info.cpu_time,
+                   rq->rq_cpu_time,
                     rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
   
                 seq_printf(seq, "\n");
@@@ -42,7 -42,7 +42,7 @@@
                 for_each_domain(cpu, sd) {
                         enum cpu_idle_type itype;
   
- -                      cpumask_scnprintf(mask_str, mask_len, sd->span);
+ +                      cpumask_scnprintf(mask_str, mask_len, &sd->span);
                         seq_printf(seq, "domain%d %s", dcount++, mask_str);
                         for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
                                         itype++) {
@@@ -123,7 -123,7 +123,7 @@@ static inline voi
   rq_sched_info_depart(struct rq *rq, unsigned long long delta)
   {
         if (rq)
-               rq->rq_sched_info.cpu_time += delta;
+               rq->rq_cpu_time += delta;
   }
   
   static inline void
@@@ -236,7 -236,6 +236,6 @@@ static inline void sched_info_depart(st
         unsigned long long delta = task_rq(t)->clock -
                                         t->sched_info.last_arrival;
   
-       t->sched_info.cpu_time += delta;
         rq_sched_info_depart(task_rq(t), delta);
   
         if (t->state == TASK_RUNNING)
diff --combined kernel/trace/trace.c

index d2e7547,f4bb380..c8760ec
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -30,6 -30,7 +30,7 @@@
   #include <linux/gfp.h>
   #include <linux/fs.h>
   #include <linux/kprobes.h>
+ #include <linux/seq_file.h>
   #include <linux/writeback.h>
   
   #include <linux/stacktrace.h>
@@@ -43,6 -44,38 +44,38 @@@
   unsigned long __read_mostly   tracing_max_latency = (cycle_t)ULONG_MAX;
   unsigned long __read_mostly   tracing_thresh;
   
+ /*
+  * We need to change this state when a selftest is running.
+  * A selftest will lurk into the ring-buffer to count the
+  * entries inserted during the selftest although some concurrent
+  * insertions into the ring-buffer such as ftrace_printk could occurred
+  * at the same time, giving false positive or negative results.
+  */
+ static bool __read_mostly tracing_selftest_running;
+ 
+ /* For tracers that don't implement custom flags */
+ static struct tracer_opt dummy_tracer_opt[] = {
+       { }
+ };
+ 
+ static struct tracer_flags dummy_tracer_flags = {
+       .val = 0,
+       .opts = dummy_tracer_opt
+ };
+ 
+ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
+ {
+       return 0;
+ }
+ 
+ /*
+  * Kill all tracing for good (never come back).
+  * It is initialized to 1 but will turn to zero if the initialization
+  * of the tracer is successful. But that is the only place that sets
+  * this back to zero.
+  */
+ int tracing_disabled = 1;
+ 
   static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
   
   static inline void ftrace_disable_cpu(void)
@@@ -62,7 -95,36 +95,36 @@@ static cpumask_t __read_mostly              tracing
   #define for_each_tracing_cpu(cpu)     \
         for_each_cpu_mask(cpu, tracing_buffer_mask)
   
- static int tracing_disabled = 1;
+ /*
+  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
+  *
+  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
+  * is set, then ftrace_dump is called. This will output the contents
+  * of the ftrace buffers to the console.  This is very useful for
+  * capturing traces that lead to crashes and outputing it to a
+  * serial console.
+  *
+  * It is default off, but you can enable it with either specifying
+  * "ftrace_dump_on_oops" in the kernel command line, or setting
+  * /proc/sys/kernel/ftrace_dump_on_oops to true.
+  */
+ int ftrace_dump_on_oops;
+ 
+ static int tracing_set_tracer(char *buf);
+ 
+ static int __init set_ftrace(char *str)
+ {
+       tracing_set_tracer(str);
+       return 1;
+ }
+ __setup("ftrace", set_ftrace);
+ 
+ static int __init set_ftrace_dump_on_oops(char *str)
+ {
+       ftrace_dump_on_oops = 1;
+       return 1;
+ }
+ __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
   
   long
   ns2usecs(cycle_t nsec)
@@@ -112,6 -174,19 +174,19 @@@ static DEFINE_PER_CPU(struct trace_arra
   /* tracer_enabled is used to toggle activation of a tracer */
   static int                    tracer_enabled = 1;
   
+ /**
+  * tracing_is_enabled - return tracer_enabled status
+  *
+  * This function is used by other tracers to know the status
+  * of the tracer_enabled flag.  Tracers may use this function
+  * to know if it should enable their features when starting
+  * up. See irqsoff tracer for an example (start_irqsoff_tracer).
+  */
+ int tracing_is_enabled(void)
+ {
+       return tracer_enabled;
+ }
+ 
   /* function tracing enabled */
   int                           ftrace_function_enabled;
   
@@@ -153,8 -228,9 +228,9 @@@ static DEFINE_MUTEX(trace_types_lock)
   /* trace_wait is a waitqueue for tasks blocked on trace_poll */
   static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
   
- /* trace_flags holds iter_ctrl options */
- unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
+ /* trace_flags holds trace_options default values */
+ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
+       TRACE_ITER_ANNOTATE;
   
   /**
    * trace_wake_up - wake up tasks waiting for trace input
@@@ -193,13 -269,6 +269,6 @@@ unsigned long nsecs_to_usecs(unsigned l
         return nsecs / 1000;
   }
   
- /*
-  * TRACE_ITER_SYM_MASK masks the options in trace_flags that
-  * control the output of kernel symbols.
-  */
- #define TRACE_ITER_SYM_MASK \
-       (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
- 
   /* These must match the bit postions in trace_iterator_flags */
   static const char *trace_options[] = {
         "print-parent",
@@@ -213,6 -282,12 +282,12 @@@
         "stacktrace",
         "sched-tree",
         "ftrace_printk",
+       "ftrace_preempt",
+       "branch",
+       "annotate",
+       "userstacktrace",
+       "sym-userobj",
+       "printk-msg-only",
         NULL
   };
   
@@@ -246,7 -321,7 +321,7 @@@ __update_max_tr(struct trace_array *tr
   
         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
         data->pid = tsk->pid;
-       data->uid = tsk->uid;
+       data->uid = task_uid(tsk);
         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
         data->policy = tsk->policy;
         data->rt_priority = tsk->rt_priority;
@@@ -359,6 -434,28 +434,28 @@@ trace_seq_putmem_hex(struct trace_seq *
         return trace_seq_putmem(s, hex, j);
   }
   
+ static int
+ trace_seq_path(struct trace_seq *s, struct path *path)
+ {
+       unsigned char *p;
+ 
+       if (s->len >= (PAGE_SIZE - 1))
+               return 0;
+       p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+       if (!IS_ERR(p)) {
+               p = mangle_path(s->buffer + s->len, p, "\n");
+               if (p) {
+                       s->len = p - s->buffer;
+                       return 1;
+               }
+       } else {
+               s->buffer[s->len++] = '?';
+               return 1;
+       }
+ 
+       return 0;
+ }
+ 
   static void
   trace_seq_reset(struct trace_seq *s)
   {
@@@ -470,7 -567,17 +567,17 @@@ int register_tracer(struct tracer *type
                 return -1;
         }
   
+       /*
+        * When this gets called we hold the BKL which means that
+        * preemption is disabled. Various trace selftests however
+        * need to disable and enable preemption for successful tests.
+        * So we drop the BKL here and grab it after the tests again.
+        */
+       unlock_kernel();
         mutex_lock(&trace_types_lock);
+ 
+       tracing_selftest_running = true;
+ 
         for (t = trace_types; t; t = t->next) {
                 if (strcmp(type->name, t->name) == 0) {
                         /* already found */
@@@ -481,12 -588,20 +588,20 @@@
                 }
         }
   
+       if (!type->set_flag)
+               type->set_flag = &dummy_set_flag;
+       if (!type->flags)
+               type->flags = &dummy_tracer_flags;
+       else
+               if (!type->flags->opts)
+                       type->flags->opts = dummy_tracer_opt;
+ 
   #ifdef CONFIG_FTRACE_STARTUP_TEST
         if (type->selftest) {
                 struct tracer *saved_tracer = current_trace;
                 struct trace_array *tr = &global_trace;
-               int saved_ctrl = tr->ctrl;
                 int i;
+ 
                 /*
                  * Run a selftest on this tracer.
                  * Here we reset the trace buffer, and set the current
@@@ -494,25 -609,23 +609,23 @@@
                  * internal tracing to verify that everything is in order.
                  * If we fail, we do not register this tracer.
                  */
-               for_each_tracing_cpu(i) {
+               for_each_tracing_cpu(i)
                         tracing_reset(tr, i);
-               }
+ 
                 current_trace = type;
-               tr->ctrl = 0;
                 /* the test is responsible for initializing and enabling */
                 pr_info("Testing tracer %s: ", type->name);
                 ret = type->selftest(type, tr);
                 /* the test is responsible for resetting too */
                 current_trace = saved_tracer;
-               tr->ctrl = saved_ctrl;
                 if (ret) {
                         printk(KERN_CONT "FAILED!\n");
                         goto out;
                 }
                 /* Only reset on passing, to avoid touching corrupted buffers */
-               for_each_tracing_cpu(i) {
+               for_each_tracing_cpu(i)
                         tracing_reset(tr, i);
-               }
+ 
                 printk(KERN_CONT "PASSED\n");
         }
   #endif
@@@ -524,7 -637,9 +637,9 @@@
                 max_tracer_type_len = len;
   
    out:
+       tracing_selftest_running = false;
         mutex_unlock(&trace_types_lock);
+       lock_kernel();
   
         return ret;
   }
@@@ -564,6 -679,16 +679,16 @@@ void tracing_reset(struct trace_array *
         ftrace_enable_cpu();
   }
   
+ void tracing_reset_online_cpus(struct trace_array *tr)
+ {
+       int cpu;
+ 
+       tr->time_start = ftrace_now(tr->cpu);
+ 
+       for_each_online_cpu(cpu)
+               tracing_reset(tr, cpu);
+ }
+ 
   #define SAVED_CMDLINES 128
   static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
   static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
@@@ -581,6 -706,91 +706,91 @@@ static void trace_init_cmdlines(void
         cmdline_idx = 0;
   }
   
+ static int trace_stop_count;
+ static DEFINE_SPINLOCK(tracing_start_lock);
+ 
+ /**
+  * ftrace_off_permanent - disable all ftrace code permanently
+  *
+  * This should only be called when a serious anomally has
+  * been detected.  This will turn off the function tracing,
+  * ring buffers, and other tracing utilites. It takes no
+  * locks and can be called from any context.
+  */
+ void ftrace_off_permanent(void)
+ {
+       tracing_disabled = 1;
+       ftrace_stop();
+       tracing_off_permanent();
+ }
+ 
+ /**
+  * tracing_start - quick start of the tracer
+  *
+  * If tracing is enabled but was stopped by tracing_stop,
+  * this will start the tracer back up.
+  */
+ void tracing_start(void)
+ {
+       struct ring_buffer *buffer;
+       unsigned long flags;
+ 
+       if (tracing_disabled)
+               return;
+ 
+       spin_lock_irqsave(&tracing_start_lock, flags);
+       if (--trace_stop_count)
+               goto out;
+ 
+       if (trace_stop_count < 0) {
+               /* Someone screwed up their debugging */
+               WARN_ON_ONCE(1);
+               trace_stop_count = 0;
+               goto out;
+       }
+ 
+ 
+       buffer = global_trace.buffer;
+       if (buffer)
+               ring_buffer_record_enable(buffer);
+ 
+       buffer = max_tr.buffer;
+       if (buffer)
+               ring_buffer_record_enable(buffer);
+ 
+       ftrace_start();
+  out:
+       spin_unlock_irqrestore(&tracing_start_lock, flags);
+ }
+ 
+ /**
+  * tracing_stop - quick stop of the tracer
+  *
+  * Light weight way to stop tracing. Use in conjunction with
+  * tracing_start.
+  */
+ void tracing_stop(void)
+ {
+       struct ring_buffer *buffer;
+       unsigned long flags;
+ 
+       ftrace_stop();
+       spin_lock_irqsave(&tracing_start_lock, flags);
+       if (trace_stop_count++)
+               goto out;
+ 
+       buffer = global_trace.buffer;
+       if (buffer)
+               ring_buffer_record_disable(buffer);
+ 
+       buffer = max_tr.buffer;
+       if (buffer)
+               ring_buffer_record_disable(buffer);
+ 
+  out:
+       spin_unlock_irqrestore(&tracing_start_lock, flags);
+ }
+ 
   void trace_stop_cmdline_recording(void);
   
   static void trace_save_cmdline(struct task_struct *tsk)
@@@ -618,7 -828,7 +828,7 @@@
         spin_unlock(&trace_cmdline_lock);
   }
   
- static char *trace_find_cmdline(int pid)
+ char *trace_find_cmdline(int pid)
   {
         char *cmdline = "<...>";
         unsigned map;
@@@ -655,6 -865,7 +865,7 @@@ tracing_generic_entry_update(struct tra
   
         entry->preempt_count            = pc & 0xff;
         entry->pid                      = (tsk) ? tsk->pid : 0;
+       entry->tgid                     = (tsk) ? tsk->tgid : 0;
         entry->flags =
   #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@@ -691,6 -902,56 +902,56 @@@ trace_function(struct trace_array *tr, 
         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
   }
   
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ static void __trace_graph_entry(struct trace_array *tr,
+                               struct trace_array_cpu *data,
+                               struct ftrace_graph_ent *trace,
+                               unsigned long flags,
+                               int pc)
+ {
+       struct ring_buffer_event *event;
+       struct ftrace_graph_ent_entry *entry;
+       unsigned long irq_flags;
+ 
+       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+               return;
+ 
+       event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, flags, pc);
+       entry->ent.type                 = TRACE_GRAPH_ENT;
+       entry->graph_ent                        = *trace;
+       ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+ }
+ 
+ static void __trace_graph_return(struct trace_array *tr,
+                               struct trace_array_cpu *data,
+                               struct ftrace_graph_ret *trace,
+                               unsigned long flags,
+                               int pc)
+ {
+       struct ring_buffer_event *event;
+       struct ftrace_graph_ret_entry *entry;
+       unsigned long irq_flags;
+ 
+       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+               return;
+ 
+       event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, flags, pc);
+       entry->ent.type                 = TRACE_GRAPH_RET;
+       entry->ret                              = *trace;
+       ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+ }
+ #endif
+ 
   void
   ftrace(struct trace_array *tr, struct trace_array_cpu *data,
          unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@@ -742,6 -1003,46 +1003,46 @@@ void __trace_stack(struct trace_array *
         ftrace_trace_stack(tr, data, flags, skip, preempt_count());
   }
   
+ static void ftrace_trace_userstack(struct trace_array *tr,
+                  struct trace_array_cpu *data,
+                  unsigned long flags, int pc)
+ {
+ #ifdef CONFIG_STACKTRACE
+       struct ring_buffer_event *event;
+       struct userstack_entry *entry;
+       struct stack_trace trace;
+       unsigned long irq_flags;
+ 
+       if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
+               return;
+ 
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, flags, pc);
+       entry->ent.type         = TRACE_USER_STACK;
+ 
+       memset(&entry->caller, 0, sizeof(entry->caller));
+ 
+       trace.nr_entries        = 0;
+       trace.max_entries       = FTRACE_STACK_ENTRIES;
+       trace.skip              = 0;
+       trace.entries           = entry->caller;
+ 
+       save_stack_trace_user(&trace);
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ #endif
+ }
+ 
+ void __trace_userstack(struct trace_array *tr,
+                  struct trace_array_cpu *data,
+                  unsigned long flags)
+ {
+       ftrace_trace_userstack(tr, data, flags, preempt_count());
+ }
+ 
   static void
   ftrace_trace_special(void *__tr, void *__data,
                      unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@@ -765,6 -1066,7 +1066,7 @@@
         entry->arg3                     = arg3;
         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
         ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+       ftrace_trace_userstack(tr, data, irq_flags, pc);
   
         trace_wake_up();
   }
@@@ -803,6 -1105,7 +1105,7 @@@ tracing_sched_switch_trace(struct trace
         entry->next_cpu = task_cpu(next);
         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
         ftrace_trace_stack(tr, data, flags, 5, pc);
+       ftrace_trace_userstack(tr, data, flags, pc);
   }
   
   void
@@@ -832,6 -1135,7 +1135,7 @@@ tracing_sched_wakeup_trace(struct trace
         entry->next_cpu                 = task_cpu(wakee);
         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
         ftrace_trace_stack(tr, data, flags, 6, pc);
+       ftrace_trace_userstack(tr, data, flags, pc);
   
         trace_wake_up();
   }
@@@ -841,26 -1145,28 +1145,28 @@@ ftrace_special(unsigned long arg1, unsi
   {
         struct trace_array *tr = &global_trace;
         struct trace_array_cpu *data;
+       unsigned long flags;
         int cpu;
         int pc;
   
-       if (tracing_disabled || !tr->ctrl)
+       if (tracing_disabled)
                 return;
   
         pc = preempt_count();
-       preempt_disable_notrace();
+       local_irq_save(flags);
         cpu = raw_smp_processor_id();
         data = tr->data[cpu];
   
-       if (likely(!atomic_read(&data->disabled)))
+       if (likely(atomic_inc_return(&data->disabled) == 1))
                 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
   
-       preempt_enable_notrace();
+       atomic_dec(&data->disabled);
+       local_irq_restore(flags);
   }
   
   #ifdef CONFIG_FUNCTION_TRACER
   static void
- function_trace_call(unsigned long ip, unsigned long parent_ip)
+ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
   {
         struct trace_array *tr = &global_trace;
         struct trace_array_cpu *data;
@@@ -873,8 -1179,7 +1179,7 @@@
                 return;
   
         pc = preempt_count();
-       resched = need_resched();
-       preempt_disable_notrace();
+       resched = ftrace_preempt_disable();
         local_save_flags(flags);
         cpu = raw_smp_processor_id();
         data = tr->data[cpu];
@@@ -884,12 -1189,97 +1189,97 @@@
                 trace_function(tr, data, ip, parent_ip, flags, pc);
   
         atomic_dec(&data->disabled);
-       if (resched)
-               preempt_enable_no_resched_notrace();
-       else
-               preempt_enable_notrace();
+       ftrace_preempt_enable(resched);
   }
   
+ static void
+ function_trace_call(unsigned long ip, unsigned long parent_ip)
+ {
+       struct trace_array *tr = &global_trace;
+       struct trace_array_cpu *data;
+       unsigned long flags;
+       long disabled;
+       int cpu;
+       int pc;
+ 
+       if (unlikely(!ftrace_function_enabled))
+               return;
+ 
+       /*
+        * Need to use raw, since this must be called before the
+        * recursive protection is performed.
+        */
+       local_irq_save(flags);
+       cpu = raw_smp_processor_id();
+       data = tr->data[cpu];
+       disabled = atomic_inc_return(&data->disabled);
+ 
+       if (likely(disabled == 1)) {
+               pc = preempt_count();
+               trace_function(tr, data, ip, parent_ip, flags, pc);
+       }
+ 
+       atomic_dec(&data->disabled);
+       local_irq_restore(flags);
+ }
+ 
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ int trace_graph_entry(struct ftrace_graph_ent *trace)
+ {
+       struct trace_array *tr = &global_trace;
+       struct trace_array_cpu *data;
+       unsigned long flags;
+       long disabled;
+       int cpu;
+       int pc;
+ 
+       if (!ftrace_trace_task(current))
+               return 0;
+ 
+       if (!ftrace_graph_addr(trace->func))
+               return 0;
+ 
+       local_irq_save(flags);
+       cpu = raw_smp_processor_id();
+       data = tr->data[cpu];
+       disabled = atomic_inc_return(&data->disabled);
+       if (likely(disabled == 1)) {
+               pc = preempt_count();
+               __trace_graph_entry(tr, data, trace, flags, pc);
+       }
+       /* Only do the atomic if it is not already set */
+       if (!test_tsk_trace_graph(current))
+               set_tsk_trace_graph(current);
+       atomic_dec(&data->disabled);
+       local_irq_restore(flags);
+ 
+       return 1;
+ }
+ 
+ void trace_graph_return(struct ftrace_graph_ret *trace)
+ {
+       struct trace_array *tr = &global_trace;
+       struct trace_array_cpu *data;
+       unsigned long flags;
+       long disabled;
+       int cpu;
+       int pc;
+ 
+       local_irq_save(flags);
+       cpu = raw_smp_processor_id();
+       data = tr->data[cpu];
+       disabled = atomic_inc_return(&data->disabled);
+       if (likely(disabled == 1)) {
+               pc = preempt_count();
+               __trace_graph_return(tr, data, trace, flags, pc);
+       }
+       if (!trace->depth)
+               clear_tsk_trace_graph(current);
+       atomic_dec(&data->disabled);
+       local_irq_restore(flags);
+ }
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ 
   static struct ftrace_ops trace_ops __read_mostly =
   {
         .func = function_trace_call,
@@@ -898,9 -1288,14 +1288,14 @@@
   void tracing_start_function_trace(void)
   {
         ftrace_function_enabled = 0;
+ 
+       if (trace_flags & TRACE_ITER_PREEMPTONLY)
+               trace_ops.func = function_trace_call_preempt_only;
+       else
+               trace_ops.func = function_trace_call;
+ 
         register_ftrace_function(&trace_ops);
-       if (tracer_enabled)
-               ftrace_function_enabled = 1;
+       ftrace_function_enabled = 1;
   }
   
   void tracing_stop_function_trace(void)
@@@ -912,6 -1307,7 +1307,7 @@@
   
   enum trace_file_type {
         TRACE_FILE_LAT_FMT      = 1,
+       TRACE_FILE_ANNOTATE     = 2,
   };
   
   static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
@@@ -1047,10 -1443,6 +1443,6 @@@ static void *s_start(struct seq_file *m
   
         atomic_inc(&trace_record_cmdline_disabled);
   
-       /* let the tracer grab locks here if needed */
-       if (current_trace->start)
-               current_trace->start(iter);
- 
         if (*pos != iter->pos) {
                 iter->ent = NULL;
                 iter->cpu = 0;
@@@ -1077,14 -1469,7 +1469,7 @@@
   
   static void s_stop(struct seq_file *m, void *p)
   {
-       struct trace_iterator *iter = m->private;
- 
         atomic_dec(&trace_record_cmdline_disabled);
- 
-       /* let the tracer release locks here if needed */
-       if (current_trace && current_trace == iter->trace && iter->trace->stop)
-               iter->trace->stop(iter);
- 
         mutex_unlock(&trace_types_lock);
   }
   
@@@ -1143,7 -1528,7 +1528,7 @@@ seq_print_sym_offset(struct trace_seq *
   # define IP_FMT "%016lx"
   #endif
   
- static int
+ int
   seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
   {
         int ret;
@@@ -1164,6 -1549,78 +1549,78 @@@
         return ret;
   }
   
+ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+                                   unsigned long ip, unsigned long sym_flags)
+ {
+       struct file *file = NULL;
+       unsigned long vmstart = 0;
+       int ret = 1;
+ 
+       if (mm) {
+               const struct vm_area_struct *vma;
+ 
+               down_read(&mm->mmap_sem);
+               vma = find_vma(mm, ip);
+               if (vma) {
+                       file = vma->vm_file;
+                       vmstart = vma->vm_start;
+               }
+               if (file) {
+                       ret = trace_seq_path(s, &file->f_path);
+                       if (ret)
+                               ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
+               }
+               up_read(&mm->mmap_sem);
+       }
+       if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+               ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+       return ret;
+ }
+ 
+ static int
+ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+                     unsigned long sym_flags)
+ {
+       struct mm_struct *mm = NULL;
+       int ret = 1;
+       unsigned int i;
+ 
+       if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+               struct task_struct *task;
+               /*
+                * we do the lookup on the thread group leader,
+                * since individual threads might have already quit!
+                */
+               rcu_read_lock();
+               task = find_task_by_vpid(entry->ent.tgid);
+               if (task)
+                       mm = get_task_mm(task);
+               rcu_read_unlock();
+       }
+ 
+       for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+               unsigned long ip = entry->caller[i];
+ 
+               if (ip == ULONG_MAX || !ret)
+                       break;
+               if (i && ret)
+                       ret = trace_seq_puts(s, " <- ");
+               if (!ip) {
+                       if (ret)
+                               ret = trace_seq_puts(s, "??");
+                       continue;
+               }
+               if (!ret)
+                       break;
+               if (ret)
+                       ret = seq_print_user_ip(s, mm, ip, sym_flags);
+       }
+ 
+       if (mm)
+               mmput(mm);
+       return ret;
+ }
+ 
   static void print_lat_help_header(struct seq_file *m)
   {
         seq_puts(m, "#                  _------=> CPU#            \n");
@@@ -1301,6 -1758,13 +1758,13 @@@ lat_print_timestamp(struct trace_seq *s
   
   static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
   
+ static int task_state_char(unsigned long state)
+ {
+       int bit = state ? __ffs(state) + 1 : 0;
+ 
+       return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+ }
+ 
   /*
    * The message is supposed to contain an ending newline.
    * If the printing stops prematurely, try to add a newline of our own.
@@@ -1338,6 -1802,23 +1802,23 @@@ void trace_seq_print_cont(struct trace_
                 trace_seq_putc(s, '\n');
   }
   
+ static void test_cpu_buff_start(struct trace_iterator *iter)
+ {
+       struct trace_seq *s = &iter->seq;
+ 
+       if (!(trace_flags & TRACE_ITER_ANNOTATE))
+               return;
+ 
+       if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
+               return;
+ 
+       if (cpu_isset(iter->cpu, iter->started))
+               return;
+ 
+       cpu_set(iter->cpu, iter->started);
+       trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+ }
+ 
   static enum print_line_t
   print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
   {
@@@ -1352,11 -1833,12 +1833,12 @@@
         char *comm;
         int S, T;
         int i;
-       unsigned state;
   
         if (entry->type == TRACE_CONT)
                 return TRACE_TYPE_HANDLED;
   
+       test_cpu_buff_start(iter);
+ 
         next_entry = find_next_entry(iter, NULL, &next_ts);
         if (!next_entry)
                 next_ts = iter->ts;
@@@ -1396,12 -1878,8 +1878,8 @@@
   
                 trace_assign_type(field, entry);
   
-               T = field->next_state < sizeof(state_to_char) ?
-                       state_to_char[field->next_state] : 'X';
- 
-               state = field->prev_state ?
-                       __ffs(field->prev_state) + 1 : 0;
-               S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
+               T = task_state_char(field->next_state);
+               S = task_state_char(field->prev_state);
                 comm = trace_find_cmdline(field->next_pid);
                 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
                                  field->prev_pid,
@@@ -1448,6 -1926,27 +1926,27 @@@
                         trace_seq_print_cont(s, iter);
                 break;
         }
+       case TRACE_BRANCH: {
+               struct trace_branch *field;
+ 
+               trace_assign_type(field, entry);
+ 
+               trace_seq_printf(s, "[%s] %s:%s:%d\n",
+                                field->correct ? "  ok  " : " MISS ",
+                                field->func,
+                                field->file,
+                                field->line);
+               break;
+       }
+       case TRACE_USER_STACK: {
+               struct userstack_entry *field;
+ 
+               trace_assign_type(field, entry);
+ 
+               seq_print_userip_objs(field, s, sym_flags);
+               trace_seq_putc(s, '\n');
+               break;
+       }
         default:
                 trace_seq_printf(s, "Unknown type %d\n", entry->type);
         }
@@@ -1472,6 -1971,8 +1971,8 @@@ static enum print_line_t print_trace_fm
         if (entry->type == TRACE_CONT)
                 return TRACE_TYPE_HANDLED;
   
+       test_cpu_buff_start(iter);
+ 
         comm = trace_find_cmdline(iter->ent->pid);
   
         t = ns2usecs(iter->ts);
@@@ -1519,10 -2020,8 +2020,8 @@@
   
                 trace_assign_type(field, entry);
   
-               S = field->prev_state < sizeof(state_to_char) ?
-                       state_to_char[field->prev_state] : 'X';
-               T = field->next_state < sizeof(state_to_char) ?
-                       state_to_char[field->next_state] : 'X';
+               T = task_state_char(field->next_state);
+               S = task_state_char(field->prev_state);
                 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
                                        field->prev_pid,
                                        field->prev_prio,
@@@ -1581,6 -2080,37 +2080,37 @@@
                         trace_seq_print_cont(s, iter);
                 break;
         }
+       case TRACE_GRAPH_RET: {
+               return print_graph_function(iter);
+       }
+       case TRACE_GRAPH_ENT: {
+               return print_graph_function(iter);
+       }
+       case TRACE_BRANCH: {
+               struct trace_branch *field;
+ 
+               trace_assign_type(field, entry);
+ 
+               trace_seq_printf(s, "[%s] %s:%s:%d\n",
+                                field->correct ? "  ok  " : " MISS ",
+                                field->func,
+                                field->file,
+                                field->line);
+               break;
+       }
+       case TRACE_USER_STACK: {
+               struct userstack_entry *field;
+ 
+               trace_assign_type(field, entry);
+ 
+               ret = seq_print_userip_objs(field, s, sym_flags);
+               if (!ret)
+                       return TRACE_TYPE_PARTIAL_LINE;
+               ret = trace_seq_putc(s, '\n');
+               if (!ret)
+                       return TRACE_TYPE_PARTIAL_LINE;
+               break;
+       }
         }
         return TRACE_TYPE_HANDLED;
   }
@@@ -1621,12 -2151,9 +2151,9 @@@ static enum print_line_t print_raw_fmt(
   
                 trace_assign_type(field, entry);
   
-               S = field->prev_state < sizeof(state_to_char) ?
-                       state_to_char[field->prev_state] : 'X';
-               T = field->next_state < sizeof(state_to_char) ?
-                       state_to_char[field->next_state] : 'X';
-               if (entry->type == TRACE_WAKE)
-                       S = '+';
+               T = task_state_char(field->next_state);
+               S = entry->type == TRACE_WAKE ? '+' :
+                       task_state_char(field->prev_state);
                 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
                                        field->prev_pid,
                                        field->prev_prio,
@@@ -1640,6 -2167,7 +2167,7 @@@
                 break;
         }
         case TRACE_SPECIAL:
+       case TRACE_USER_STACK:
         case TRACE_STACK: {
                 struct special_entry *field;
   
@@@ -1712,12 -2240,9 +2240,9 @@@ static enum print_line_t print_hex_fmt(
   
                 trace_assign_type(field, entry);
   
-               S = field->prev_state < sizeof(state_to_char) ?
-                       state_to_char[field->prev_state] : 'X';
-               T = field->next_state < sizeof(state_to_char) ?
-                       state_to_char[field->next_state] : 'X';
-               if (entry->type == TRACE_WAKE)
-                       S = '+';
+               T = task_state_char(field->next_state);
+               S = entry->type == TRACE_WAKE ? '+' :
+                       task_state_char(field->prev_state);
                 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
                 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
                 SEQ_PUT_HEX_FIELD_RET(s, S);
@@@ -1728,6 -2253,7 +2253,7 @@@
                 break;
         }
         case TRACE_SPECIAL:
+       case TRACE_USER_STACK:
         case TRACE_STACK: {
                 struct special_entry *field;
   
@@@ -1744,6 -2270,25 +2270,25 @@@
         return TRACE_TYPE_HANDLED;
   }
   
+ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
+ {
+       struct trace_seq *s = &iter->seq;
+       struct trace_entry *entry = iter->ent;
+       struct print_entry *field;
+       int ret;
+ 
+       trace_assign_type(field, entry);
+ 
+       ret = trace_seq_printf(s, field->buf);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+ 
+       if (entry->flags & TRACE_FLAG_CONT)
+               trace_seq_print_cont(s, iter);
+ 
+       return TRACE_TYPE_HANDLED;
+ }
+ 
   static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
   {
         struct trace_seq *s = &iter->seq;
@@@ -1782,6 -2327,7 +2327,7 @@@
                 break;
         }
         case TRACE_SPECIAL:
+       case TRACE_USER_STACK:
         case TRACE_STACK: {
                 struct special_entry *field;
   
@@@ -1823,6 -2369,11 +2369,11 @@@ static enum print_line_t print_trace_li
                         return ret;
         }
   
+       if (iter->ent->type == TRACE_PRINT &&
+                       trace_flags & TRACE_ITER_PRINTK &&
+                       trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+               return print_printk_msg_only(iter);
+ 
         if (trace_flags & TRACE_ITER_BIN)
                 return print_bin_fmt(iter);
   
@@@ -1847,7 -2398,9 +2398,9 @@@ static int s_show(struct seq_file *m, v
                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
                         seq_puts(m, "#\n");
                 }
-               if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+               if (iter->trace && iter->trace->print_header)
+                       iter->trace->print_header(m);
+               else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
                         /* print nothing if the buffers are empty */
                         if (trace_empty(iter))
                                 return 0;
@@@ -1899,6 -2452,15 +2452,15 @@@ __tracing_open(struct inode *inode, str
         iter->trace = current_trace;
         iter->pos = -1;
   
+       /* Notify the tracer early; before we stop tracing. */
+       if (iter->trace && iter->trace->open)
+               iter->trace->open(iter);
+ 
+       /* Annotate start of buffers if we had overruns */
+       if (ring_buffer_overruns(iter->tr->buffer))
+               iter->iter_flags |= TRACE_FILE_ANNOTATE;
+ 
+ 
         for_each_tracing_cpu(cpu) {
   
                 iter->buffer_iter[cpu] =
@@@ -1917,13 -2479,7 +2479,7 @@@
         m->private = iter;
   
         /* stop the trace while dumping */
-       if (iter->tr->ctrl) {
-               tracer_enabled = 0;
-               ftrace_function_enabled = 0;
-       }
- 
-       if (iter->trace && iter->trace->open)
-                       iter->trace->open(iter);
+       tracing_stop();
   
         mutex_unlock(&trace_types_lock);
   
@@@ -1966,14 -2522,7 +2522,7 @@@ int tracing_release(struct inode *inode
                 iter->trace->close(iter);
   
         /* reenable tracing if it was previously enabled */
-       if (iter->tr->ctrl) {
-               tracer_enabled = 1;
-               /*
-                * It is safe to enable function tracing even if it
-                * isn't used
-                */
-               ftrace_function_enabled = 1;
-       }
+       tracing_start();
         mutex_unlock(&trace_types_lock);
   
         seq_release(inode, file);
@@@ -2126,7 -2675,7 +2675,7 @@@ tracing_cpumask_read(struct file *filp
   
         mutex_lock(&tracing_cpumask_update_lock);
   
- -      len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+ +      len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
         if (count - len < 2) {
                 count = -EINVAL;
                 goto out_err;
@@@ -2147,11 -2696,11 +2696,11 @@@ tracing_cpumask_write(struct file *filp
         int err, cpu;
   
         mutex_lock(&tracing_cpumask_update_lock);
- -      err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+ +      err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
         if (err)
                 goto err_unlock;
   
-       raw_local_irq_disable();
+       local_irq_disable();
         __raw_spin_lock(&ftrace_max_lock);
         for_each_tracing_cpu(cpu) {
                 /*
@@@ -2168,7 -2717,7 +2717,7 @@@
                 }
         }
         __raw_spin_unlock(&ftrace_max_lock);
-       raw_local_irq_enable();
+       local_irq_enable();
   
         tracing_cpumask = tracing_cpumask_new;
   
@@@ -2189,13 -2738,16 +2738,16 @@@ static struct file_operations tracing_c
   };
   
   static ssize_t
- tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
+ tracing_trace_options_read(struct file *filp, char __user *ubuf,
                        size_t cnt, loff_t *ppos)
   {
+       int i;
         char *buf;
         int r = 0;
         int len = 0;
-       int i;
+       u32 tracer_flags = current_trace->flags->val;
+       struct tracer_opt *trace_opts = current_trace->flags->opts;
+ 
   
         /* calulate max size */
         for (i = 0; trace_options[i]; i++) {
@@@ -2203,6 -2755,15 +2755,15 @@@
                 len += 3; /* "no" and space */
         }
   
+       /*
+        * Increase the size with names of options specific
+        * of the current tracer.
+        */
+       for (i = 0; trace_opts[i].name; i++) {
+               len += strlen(trace_opts[i].name);
+               len += 3; /* "no" and space */
+       }
+ 
         /* +2 for \n and \0 */
         buf = kmalloc(len + 2, GFP_KERNEL);
         if (!buf)
@@@ -2215,6 -2776,15 +2776,15 @@@
                         r += sprintf(buf + r, "no%s ", trace_options[i]);
         }
   
+       for (i = 0; trace_opts[i].name; i++) {
+               if (tracer_flags & trace_opts[i].bit)
+                       r += sprintf(buf + r, "%s ",
+                               trace_opts[i].name);
+               else
+                       r += sprintf(buf + r, "no%s ",
+                               trace_opts[i].name);
+       }
+ 
         r += sprintf(buf + r, "\n");
         WARN_ON(r >= len + 2);
   
@@@ -2225,13 -2795,48 +2795,48 @@@
         return r;
   }
   
+ /* Try to assign a tracer specific option */
+ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
+ {
+       struct tracer_flags *trace_flags = trace->flags;
+       struct tracer_opt *opts = NULL;
+       int ret = 0, i = 0;
+       int len;
+ 
+       for (i = 0; trace_flags->opts[i].name; i++) {
+               opts = &trace_flags->opts[i];
+               len = strlen(opts->name);
+ 
+               if (strncmp(cmp, opts->name, len) == 0) {
+                       ret = trace->set_flag(trace_flags->val,
+                               opts->bit, !neg);
+                       break;
+               }
+       }
+       /* Not found */
+       if (!trace_flags->opts[i].name)
+               return -EINVAL;
+ 
+       /* Refused to handle */
+       if (ret)
+               return ret;
+ 
+       if (neg)
+               trace_flags->val &= ~opts->bit;
+       else
+               trace_flags->val |= opts->bit;
+ 
+       return 0;
+ }
+ 
   static ssize_t
- tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
+ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                         size_t cnt, loff_t *ppos)
   {
         char buf[64];
         char *cmp = buf;
         int neg = 0;
+       int ret;
         int i;
   
         if (cnt >= sizeof(buf))
@@@ -2258,11 -2863,13 +2863,13 @@@
                         break;
                 }
         }
-       /*
-        * If no option could be set, return an error:
-        */
-       if (!trace_options[i])
-               return -EINVAL;
+ 
+       /* If no option could be set, test the specific tracer options */
+       if (!trace_options[i]) {
+               ret = set_tracer_option(current_trace, cmp, neg);
+               if (ret)
+                       return ret;
+       }
   
         filp->f_pos += cnt;
   
@@@ -2271,8 -2878,8 +2878,8 @@@
   
   static struct file_operations tracing_iter_fops = {
         .open           = tracing_open_generic,
-       .read           = tracing_iter_ctrl_read,
-       .write          = tracing_iter_ctrl_write,
+       .read           = tracing_trace_options_read,
+       .write          = tracing_trace_options_write,
   };
   
   static const char readme_msg[] =
@@@ -2286,9 -2893,9 +2893,9 @@@
         "# echo sched_switch > /debug/tracing/current_tracer\n"
         "# cat /debug/tracing/current_tracer\n"
         "sched_switch\n"
-       "# cat /debug/tracing/iter_ctrl\n"
+       "# cat /debug/tracing/trace_options\n"
         "noprint-parent nosym-offset nosym-addr noverbose\n"
-       "# echo print-parent > /debug/tracing/iter_ctrl\n"
+       "# echo print-parent > /debug/tracing/trace_options\n"
         "# echo 1 > /debug/tracing/tracing_enabled\n"
         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
         "echo 0 > /debug/tracing/tracing_enabled\n"
@@@ -2311,11 -2918,10 +2918,10 @@@ static ssize_
   tracing_ctrl_read(struct file *filp, char __user *ubuf,
                   size_t cnt, loff_t *ppos)
   {
         char buf[64];
         int r;
   
-       r = sprintf(buf, "%ld\n", tr->ctrl);
+       r = sprintf(buf, "%u\n", tracer_enabled);
         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   }
   
@@@ -2343,16 -2949,18 +2949,18 @@@ tracing_ctrl_write(struct file *filp, c
         val = !!val;
   
         mutex_lock(&trace_types_lock);
-       if (tr->ctrl ^ val) {
-               if (val)
+       if (tracer_enabled ^ val) {
+               if (val) {
                         tracer_enabled = 1;
-               else
+                       if (current_trace->start)
+                               current_trace->start(tr);
+                       tracing_start();
+               } else {
                         tracer_enabled = 0;
- 
-               tr->ctrl = val;
- 
-               if (current_trace && current_trace->ctrl_update)
-                       current_trace->ctrl_update(tr);
+                       tracing_stop();
+                       if (current_trace->stop)
+                               current_trace->stop(tr);
+               }
         }
         mutex_unlock(&trace_types_lock);
   
@@@ -2378,29 -2986,11 +2986,11 @@@ tracing_set_trace_read(struct file *fil
         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   }
   
- static ssize_t
- tracing_set_trace_write(struct file *filp, const char __user *ubuf,
-                       size_t cnt, loff_t *ppos)
+ static int tracing_set_tracer(char *buf)
   {
         struct trace_array *tr = &global_trace;
         struct tracer *t;
-       char buf[max_tracer_type_len+1];
-       int i;
-       size_t ret;
- 
-       ret = cnt;
- 
-       if (cnt > max_tracer_type_len)
-               cnt = max_tracer_type_len;
- 
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
- 
-       buf[cnt] = 0;
- 
-       /* strip ending whitespace. */
-       for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
-               buf[i] = 0;
+       int ret = 0;
   
         mutex_lock(&trace_types_lock);
         for (t = trace_types; t; t = t->next) {
@@@ -2414,18 -3004,52 +3004,52 @@@
         if (t == current_trace)
                 goto out;
   
+       trace_branch_disable();
         if (current_trace && current_trace->reset)
                 current_trace->reset(tr);
   
         current_trace = t;
-       if (t->init)
-               t->init(tr);
+       if (t->init) {
+               ret = t->init(tr);
+               if (ret)
+                       goto out;
+       }
   
+       trace_branch_enable(tr);
    out:
         mutex_unlock(&trace_types_lock);
   
-       if (ret > 0)
-               filp->f_pos += ret;
+       return ret;
+ }
+ 
+ static ssize_t
+ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
+                       size_t cnt, loff_t *ppos)
+ {
+       char buf[max_tracer_type_len+1];
+       int i;
+       size_t ret;
+       int err;
+ 
+       ret = cnt;
+ 
+       if (cnt > max_tracer_type_len)
+               cnt = max_tracer_type_len;
+ 
+       if (copy_from_user(&buf, ubuf, cnt))
+               return -EFAULT;
+ 
+       buf[cnt] = 0;
+ 
+       /* strip ending whitespace. */
+       for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
+               buf[i] = 0;
+ 
+       err = tracing_set_tracer(buf);
+       if (err)
+               return err;
+ 
+       filp->f_pos += ret;
   
         return ret;
   }
@@@ -2492,6 -3116,10 +3116,10 @@@ static int tracing_open_pipe(struct ino
                 return -ENOMEM;
   
         mutex_lock(&trace_types_lock);
+ 
+       /* trace pipe does not show start of buffer */
+       cpus_setall(iter->started);
+ 
         iter->tr = &global_trace;
         iter->trace = current_trace;
         filp->private_data = iter;
@@@ -2667,7 -3295,7 +3295,7 @@@ tracing_entries_read(struct file *filp
         char buf[64];
         int r;
   
-       r = sprintf(buf, "%lu\n", tr->entries);
+       r = sprintf(buf, "%lu\n", tr->entries >> 10);
         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   }
   
@@@ -2678,7 -3306,6 +3306,6 @@@ tracing_entries_write(struct file *filp
         unsigned long val;
         char buf[64];
         int ret, cpu;
-       struct trace_array *tr = filp->private_data;
   
         if (cnt >= sizeof(buf))
                 return -EINVAL;
@@@ -2698,12 -3325,7 +3325,7 @@@
   
         mutex_lock(&trace_types_lock);
   
-       if (tr->ctrl) {
-               cnt = -EBUSY;
-               pr_info("ftrace: please disable tracing"
-                       " before modifying buffer size\n");
-               goto out;
-       }
+       tracing_stop();
   
         /* disable all cpu buffers */
         for_each_tracing_cpu(cpu) {
@@@ -2713,6 -3335,9 +3335,9 @@@
                         atomic_inc(&max_tr.data[cpu]->disabled);
         }
   
+       /* value is in KB */
+       val <<= 10;
+ 
         if (val != global_trace.entries) {
                 ret = ring_buffer_resize(global_trace.buffer, val);
                 if (ret < 0) {
@@@ -2751,6 -3376,7 +3376,7 @@@
                         atomic_dec(&max_tr.data[cpu]->disabled);
         }
   
+       tracing_start();
         max_tr.entries = global_trace.entries;
         mutex_unlock(&trace_types_lock);
   
@@@ -2762,7 -3388,7 +3388,7 @@@ static int mark_printk(const char *fmt
         int ret;
         va_list args;
         va_start(args, fmt);
-       ret = trace_vprintk(0, fmt, args);
+       ret = trace_vprintk(0, -1, fmt, args);
         va_end(args);
         return ret;
   }
@@@ -2773,9 -3399,8 +3399,8 @@@ tracing_mark_write(struct file *filp, c
   {
         char *buf;
         char *end;
-       struct trace_array *tr = &global_trace;
   
-       if (!tr->ctrl || tracing_disabled)
+       if (tracing_disabled)
                 return -EINVAL;
   
         if (cnt > TRACE_BUF_SIZE)
@@@ -2841,22 -3466,38 +3466,38 @@@ static struct file_operations tracing_m
   
   #ifdef CONFIG_DYNAMIC_FTRACE
   
+ int __weak ftrace_arch_read_dyn_info(char *buf, int size)
+ {
+       return 0;
+ }
+ 
   static ssize_t
- tracing_read_long(struct file *filp, char __user *ubuf,
+ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
                   size_t cnt, loff_t *ppos)
   {
+       static char ftrace_dyn_info_buffer[1024];
+       static DEFINE_MUTEX(dyn_info_mutex);
         unsigned long *p = filp->private_data;
-       char buf[64];
+       char *buf = ftrace_dyn_info_buffer;
+       int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
         int r;
   
-       r = sprintf(buf, "%ld\n", *p);
+       mutex_lock(&dyn_info_mutex);
+       r = sprintf(buf, "%ld ", *p);
   
-       return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+       r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
+       buf[r++] = '\n';
+ 
+       r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ 
+       mutex_unlock(&dyn_info_mutex);
+ 
+       return r;
   }
   
- static struct file_operations tracing_read_long_fops = {
+ static struct file_operations tracing_dyn_info_fops = {
         .open           = tracing_open_generic,
-       .read           = tracing_read_long,
+       .read           = tracing_read_dyn_info,
   };
   #endif
   
@@@ -2897,10 -3538,10 +3538,10 @@@ static __init int tracer_init_debugfs(v
         if (!entry)
                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
   
-       entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
+       entry = debugfs_create_file("trace_options", 0644, d_tracer,
                                     NULL, &tracing_iter_fops);
         if (!entry)
-               pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
+               pr_warning("Could not create debugfs 'trace_options' entry\n");
   
         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
                                     NULL, &tracing_cpumask_fops);
@@@ -2950,11 -3591,11 +3591,11 @@@
                 pr_warning("Could not create debugfs "
                            "'trace_pipe' entry\n");
   
-       entry = debugfs_create_file("trace_entries", 0644, d_tracer,
+       entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
                                     &global_trace, &tracing_entries_fops);
         if (!entry)
                 pr_warning("Could not create debugfs "
-                          "'trace_entries' entry\n");
+                          "'buffer_size_kb' entry\n");
   
         entry = debugfs_create_file("trace_marker", 0220, d_tracer,
                                     NULL, &tracing_mark_fops);
@@@ -2965,7 -3606,7 +3606,7 @@@
   #ifdef CONFIG_DYNAMIC_FTRACE
         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
                                     &ftrace_update_tot_cnt,
-                                   &tracing_read_long_fops);
+                                   &tracing_dyn_info_fops);
         if (!entry)
                 pr_warning("Could not create debugfs "
                            "'dyn_ftrace_total_info' entry\n");
@@@ -2976,7 -3617,7 +3617,7 @@@
         return 0;
   }
   
- int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
   {
         static DEFINE_SPINLOCK(trace_buf_lock);
         static char trace_buf[TRACE_BUF_SIZE];
@@@ -2984,11 -3625,11 +3625,11 @@@
         struct ring_buffer_event *event;
         struct trace_array *tr = &global_trace;
         struct trace_array_cpu *data;
-       struct print_entry *entry;
-       unsigned long flags, irq_flags;
         int cpu, len = 0, size, pc;
+       struct print_entry *entry;
+       unsigned long irq_flags;
   
-       if (!tr->ctrl || tracing_disabled)
+       if (tracing_disabled || tracing_selftest_running)
                 return 0;
   
         pc = preempt_count();
@@@ -2999,7 -3640,8 +3640,8 @@@
         if (unlikely(atomic_read(&data->disabled)))
                 goto out;
   
-       spin_lock_irqsave(&trace_buf_lock, flags);
+       pause_graph_tracing();
+       spin_lock_irqsave(&trace_buf_lock, irq_flags);
         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
   
         len = min(len, TRACE_BUF_SIZE-1);
@@@ -3010,17 -3652,18 +3652,18 @@@
         if (!event)
                 goto out_unlock;
         entry = ring_buffer_event_data(event);
-       tracing_generic_entry_update(&entry->ent, flags, pc);
+       tracing_generic_entry_update(&entry->ent, irq_flags, pc);
         entry->ent.type                 = TRACE_PRINT;
         entry->ip                       = ip;
+       entry->depth                    = depth;
   
         memcpy(&entry->buf, trace_buf, len);
         entry->buf[len] = 0;
         ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
   
    out_unlock:
-       spin_unlock_irqrestore(&trace_buf_lock, flags);
- 
+       spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
+       unpause_graph_tracing();
    out:
         preempt_enable_notrace();
   
@@@ -3037,7 -3680,7 +3680,7 @@@ int __ftrace_printk(unsigned long ip, c
                 return 0;
   
         va_start(ap, fmt);
-       ret = trace_vprintk(ip, fmt, ap);
+       ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
         va_end(ap);
         return ret;
   }
@@@ -3046,7 -3689,8 +3689,8 @@@ EXPORT_SYMBOL_GPL(__ftrace_printk)
   static int trace_panic_handler(struct notifier_block *this,
                                unsigned long event, void *unused)
   {
-       ftrace_dump();
+       if (ftrace_dump_on_oops)
+               ftrace_dump();
         return NOTIFY_OK;
   }
   
@@@ -3062,7 -3706,8 +3706,8 @@@ static int trace_die_handler(struct not
   {
         switch (val) {
         case DIE_OOPS:
-               ftrace_dump();
+               if (ftrace_dump_on_oops)
+                       ftrace_dump();
                 break;
         default:
                 break;
@@@ -3103,7 -3748,6 +3748,6 @@@ trace_printk_seq(struct trace_seq *s
         trace_seq_reset(s);
   }
   
- 
   void ftrace_dump(void)
   {
         static DEFINE_SPINLOCK(ftrace_dump_lock);
@@@ -3128,6 -3772,9 +3772,9 @@@
                 atomic_inc(&global_trace.data[cpu]->disabled);
         }
   
+       /* don't look at user memory in panic mode */
+       trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+ 
         printk(KERN_TRACE "Dumping ftrace buffer:\n");
   
         iter.tr = &global_trace;
@@@ -3221,7 -3868,6 +3868,6 @@@ __init static int tracer_alloc_buffers(
   #endif
   
         /* All seems OK, enable tracing */
-       global_trace.ctrl = tracer_enabled;
         tracing_disabled = 0;
   
         atomic_notifier_chain_register(&panic_notifier_list,
diff --combined lib/Kconfig

index 7823f83,fd4118e..2ba43c4
--- 1/lib/Kconfig
--- 2/lib/Kconfig
+++ b/lib/Kconfig
@@@ -64,6 -64,8 +64,8 @@@ config CRC
   
   config LIBCRC32C
         tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check"
+       select CRYPTO
+       select CRYPTO_CRC32C
         help
           This option is provided for the case where no in-kernel-tree
           modules require CRC32c functions, but a module built outside the
@@@ -157,11 -159,4 +159,11 @@@ config CHECK_SIGNATUR
   config HAVE_LMB
         boolean
   
+ +config CPUMASK_OFFSTACK
+ +      bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
+ +      help
+ +        Use dynamic allocation for cpumask_var_t, instead of putting
+ +        them on the stack.  This is a bit more expensive, but avoids
+ +        stack overflow.
+ +
   endmenu
author	Rusty Russell <rusty@rustcorp.com.au>
	Mon, 29 Dec 2008 21:32:35 +0000 (08:02 +1030)
committer	Rusty Russell <rusty@rustcorp.com.au>
	Mon, 29 Dec 2008 21:32:35 +0000 (08:02 +1030)
		1	2
arch/m32r/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/pseries/xics.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/sysdev/mpic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/topology.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pci.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel_cacheinfo.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/hpet.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/io_apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/irq_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/smpboot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/lguest/boot.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/smp.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/profile.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_stats.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history