default y
select HAVE_IDE
select HAVE_OPROFILE
+ select INIT_ALL_POSSIBLE
config SBUS
bool
bool
default y
- config SCHED_NO_NO_OMIT_FRAME_POINTER
+ config SCHED_OMIT_FRAME_POINTER
bool
default y
#define DBG(fmt...)
#endif
- int smp_hw_index[NR_CPUS];
struct thread_info *secondary_ti;
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
}
}
+ static irqreturn_t call_function_action(int irq, void *data)
+ {
+ generic_smp_call_function_interrupt();
+ return IRQ_HANDLED;
+ }
+
+ static irqreturn_t reschedule_action(int irq, void *data)
+ {
+ /* we just need the return path side effect of checking need_resched */
+ return IRQ_HANDLED;
+ }
+
+ static irqreturn_t call_function_single_action(int irq, void *data)
+ {
+ generic_smp_call_function_single_interrupt();
+ return IRQ_HANDLED;
+ }
+
+ static irqreturn_t debug_ipi_action(int irq, void *data)
+ {
+ smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
+ return IRQ_HANDLED;
+ }
+
+ static irq_handler_t smp_ipi_action[] = {
+ [PPC_MSG_CALL_FUNCTION] = call_function_action,
+ [PPC_MSG_RESCHEDULE] = reschedule_action,
+ [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+ [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+ };
+
+ const char *smp_ipi_name[] = {
+ [PPC_MSG_CALL_FUNCTION] = "ipi call function",
+ [PPC_MSG_RESCHEDULE] = "ipi reschedule",
+ [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+ [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+ };
+
+ /* optional function to request ipi, for controllers with >= 4 ipis */
+ int smp_request_message_ipi(int virq, int msg)
+ {
+ int err;
+
+ if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+ return -EINVAL;
+ }
+ #if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
+ if (msg == PPC_MSG_DEBUGGER_BREAK) {
+ return 1;
+ }
+ #endif
+ err = request_irq(virq, smp_ipi_action[msg], IRQF_DISABLED|IRQF_PERCPU,
+ smp_ipi_name[msg], 0);
+ WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
+ virq, smp_ipi_name[msg], err);
+
+ return err;
+ }
+
void smp_send_reschedule(int cpu)
{
if (likely(smp_ops))
static struct device_node *cpu_to_l2cache(int cpu)
{
struct device_node *np;
- const phandle *php;
- phandle ph;
+ struct device_node *cache;
if (!cpu_present(cpu))
return NULL;
if (np == NULL)
return NULL;
- php = of_get_property(np, "l2-cache", NULL);
- if (php == NULL)
- return NULL;
- ph = *php;
+ cache = of_find_next_cache_node(np);
+
of_node_put(np);
- return of_find_node_by_phandle(ph);
+ return cache;
}
/* Activate a secondary processor. */
static unsigned tb_to_ns_shift __read_mostly;
static unsigned long boot_tb __read_mostly;
- static struct gettimeofday_struct do_gtod;
-
extern struct timezone sys_tz;
static long timezone_offset;
}
EXPORT_SYMBOL(udelay);
static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
u64 new_tb_to_xs)
{
- unsigned temp_idx;
- struct gettimeofday_vars *temp_varp;
-
- temp_idx = (do_gtod.var_idx == 0);
- temp_varp = &do_gtod.vars[temp_idx];
-
- temp_varp->tb_to_xs = new_tb_to_xs;
- temp_varp->tb_orig_stamp = new_tb_stamp;
- temp_varp->stamp_xsec = new_stamp_xsec;
- smp_mb();
- do_gtod.varp = temp_varp;
- do_gtod.var_idx = temp_idx;
-
/*
* tb_update_count is used to allow the userspace gettimeofday code
* to assure itself that it sees a consistent view of the tb_to_xs and
vdso_data->tb_to_xs = new_tb_to_xs;
vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
+ vdso_data->stamp_xtime = xtime;
smp_wmb();
++(vdso_data->tb_update_count);
}
tb_ticks_per_sec = new_tb_ticks_per_sec;
calc_cputime_factors();
div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
- do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
tb_to_xs = divres.result_low;
- do_gtod.varp->tb_to_xs = tb_to_xs;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
vdso_data->tb_to_xs = tb_to_xs;
}
struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
*dec = decrementer_clockevent;
- dec->cpumask = cpumask_of_cpu(cpu);
+ dec->cpumask = cpumask_of(cpu);
printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
dec->name, dec->mult, dec->shift, cpu);
sys_tz.tz_dsttime = 0;
}
- do_gtod.varp = &do_gtod.vars[0];
- do_gtod.var_idx = 0;
- do_gtod.varp->tb_orig_stamp = tb_last_jiffy;
- __get_cpu_var(last_jiffy) = tb_last_jiffy;
- do_gtod.varp->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC;
- do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
- do_gtod.varp->tb_to_xs = tb_to_xs;
- do_gtod.tb_to_us = tb_to_us;
-
vdso_data->tb_orig_stamp = tb_last_jiffy;
vdso_data->tb_update_count = 0;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
lpar_xirr_info_set((0xff << 24) | irq);
}
-static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
{
unsigned int irq;
int status;
int i, j;
struct device_node *np;
u32 ilen;
- const u32 *ireg, *isize;
+ const u32 *ireg;
u32 hcpuid;
/* Find the server numbers for the boot cpu. */
}
}
- /* get the bit size of server numbers */
- isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
- if (isize)
- interrupt_server_size = *isize;
-
of_node_put(np);
}
struct device_node *np;
u32 indx = 0;
int found = 0;
+ const u32 *isize;
ppc64_boot_msg(0x20, "XICS Init");
if (found == 0)
return;
+ /* get the bit size of server numbers */
+ found = 0;
+
+ for_each_compatible_node(np, NULL, "ibm,ppc-xics") {
+ isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+
+ if (!isize)
+ continue;
+
+ if (!found) {
+ interrupt_server_size = *isize;
+ found = 1;
+ } else if (*isize != interrupt_server_size) {
+ printk(KERN_WARNING "XICS: "
+ "mismatched ibm,interrupt-server#-size\n");
+ interrupt_server_size = max(*isize,
+ interrupt_server_size);
+ }
+ }
+
xics_update_irq_servers();
xics_init_host();
/* Have the calling processor join or leave the specified global queue */
static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
{
- int status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE,
- (1UL << interrupt_server_size) - 1 - gserver, join);
- WARN_ON(status < 0);
+ int index;
+ int status;
+
+ if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+ return;
+
+ index = (1UL << interrupt_server_size) - 1 - gserver;
+
+ status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+
+ WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+ GLOBAL_INTERRUPT_QUEUE, index, join, status);
}
void xics_setup_cpu(void)
/* Reset affinity to all cpus */
irq_desc[virq].affinity = CPU_MASK_ALL;
- desc->chip->set_affinity(virq, CPU_MASK_ALL);
+ desc->chip->set_affinity(virq, cpu_all_mask);
unlock:
spin_unlock_irqrestore(&desc->lock, flags);
}
(void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
}
- #ifdef CONFIG_SMP
- static irqreturn_t mpic_ipi_action(int irq, void *data)
- {
- long ipi = (long)data;
-
- smp_message_recv(ipi);
-
- return IRQ_HANDLED;
- }
- #endif /* CONFIG_SMP */
-
/*
* Linux descriptor level callbacks
*/
#endif /* CONFIG_SMP */
-void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
{
struct mpic *mpic = mpic_from_irq(irq);
unsigned int src = mpic_irq_to_hw(irq);
} else {
cpumask_t tmp;
- cpus_and(tmp, cpumask, cpu_online_map);
+ cpumask_and(&tmp, cpumask, cpu_online_mask);
mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
mpic_physmask(cpus_addr(tmp)[0]));
void mpic_request_ipis(void)
{
struct mpic *mpic = mpic_primary;
- long i, err;
- static char *ipi_names[] = {
- "IPI0 (call function)",
- "IPI1 (reschedule)",
- "IPI2 (call function single)",
- "IPI3 (debugger break)",
- };
+ int i;
BUG_ON(mpic == NULL);
printk(KERN_INFO "mpic: requesting IPIs ... \n");
unsigned int vipi = irq_create_mapping(mpic->irqhost,
mpic->ipi_vecs[0] + i);
if (vipi == NO_IRQ) {
- printk(KERN_ERR "Failed to map IPI %ld\n", i);
- break;
- }
- err = request_irq(vipi, mpic_ipi_action,
- IRQF_DISABLED|IRQF_PERCPU,
- ipi_names[i], (void *)i);
- if (err) {
- printk(KERN_ERR "Request of irq %d for IPI %ld failed\n",
- vipi, i);
- break;
+ printk(KERN_ERR "Failed to map %s\n", smp_ipi_name[i]);
+ continue;
}
+ smp_request_message_ipi(vipi, i);
}
}
config GENERIC_TIME
def_bool y
+ config GENERIC_TIME_VSYSCALL
+ def_bool y
+
config GENERIC_CLOCKEVENTS
def_bool y
bool
default y if KVM
+ config VIRT_CPU_ACCOUNTING
+ def_bool y
+
mainmenu "Linux Kernel Configuration"
config S390
def_bool y
+ select USE_GENERIC_SMP_HELPERS if SMP
+ select HAVE_FUNCTION_TRACER
select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_KVM if 64BIT
select HAVE_ARCH_TRACEHOOK
+ select INIT_ALL_POSSIBLE
source "init/Kconfig"
Class (z9 BC). The kernel will be slightly faster but will not
work on older machines such as the z990, z890, z900, and z800.
+ config MARCH_Z10
+ bool "IBM System z10"
+ help
+ Select this to enable optimizations for IBM System z10. The
+ kernel will be slightly faster but will not work on older
+ machines such as the z990, z890, z900, z800, z9-109, z9-ec
+ and z9-bc.
+
endchoice
config PACK_STACK
If unsure, say Y.
- config QDIO_DEBUG
- bool "Extended debugging information"
- depends on QDIO
- help
- Say Y here to get extended debugging output in
- /sys/kernel/debug/s390dbf/qdio...
- Warning: this option reduces the performance of the QDIO module.
-
- If unsure, say N.
-
config CHSC_SCH
tristate "Support for CHSC subchannels"
help
hypervisor. The ESSA instruction is used to do the states
changes between a page that has content and the unused state.
- config VIRT_TIMER
- bool "Virtual CPU timer support"
- help
- This provides a kernel interface for virtual CPU timers.
- Default is disabled.
-
- config VIRT_CPU_ACCOUNTING
- bool "Base user process accounting on virtual cpu timer"
- depends on VIRT_TIMER
- help
- Select this option to use CPU timer deltas to do user
- process accounting.
-
config APPLDATA_BASE
bool "Linux - VM Monitor Stream, base infrastructure"
- depends on PROC_FS && VIRT_TIMER=y
+ depends on PROC_FS
help
This provides a kernel interface for creating and updating z/VM APPLDATA
monitor records. The monitor records are updated at certain time
* cpu_number_map in other architectures.
*/
+ #define KMSG_COMPONENT "cpu"
+ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mm.h>
struct _lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_possible_map);
-
static struct task_struct *current_set[NR_CPUS];
static u8 smp_cpu_type;
static void smp_ext_bitcall(int, ec_bit_sig);
- /*
- * Structure and data for __smp_call_function_map(). This is designed to
- * minimise static memory requirements. It also looks cleaner.
- */
- static DEFINE_SPINLOCK(call_lock);
-
- struct call_data_struct {
- void (*func) (void *info);
- void *info;
- cpumask_t started;
- cpumask_t finished;
- int wait;
- };
-
- static struct call_data_struct *call_data;
-
- /*
- * 'Call function' interrupt callback
- */
- static void do_call_function(void)
- {
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
- cpu_set(smp_processor_id(), call_data->started);
- (*func)(info);
- if (wait)
- cpu_set(smp_processor_id(), call_data->finished);;
- }
-
- static void __smp_call_function_map(void (*func) (void *info), void *info,
- int wait, cpumask_t map)
- {
- struct call_data_struct data;
- int cpu, local = 0;
-
- /*
- * Can deadlock when interrupts are disabled or if in wrong context.
- */
- WARN_ON(irqs_disabled() || in_irq());
-
- /*
- * Check for local function call. We have to have the same call order
- * as in on_each_cpu() because of machine_restart_smp().
- */
- if (cpu_isset(smp_processor_id(), map)) {
- local = 1;
- cpu_clear(smp_processor_id(), map);
- }
-
- cpus_and(map, map, cpu_online_map);
- if (cpus_empty(map))
- goto out;
-
- data.func = func;
- data.info = info;
- data.started = CPU_MASK_NONE;
- data.wait = wait;
- if (wait)
- data.finished = CPU_MASK_NONE;
-
- call_data = &data;
-
- for_each_cpu_mask(cpu, map)
- smp_ext_bitcall(cpu, ec_call_function);
-
- /* Wait for response */
- while (!cpus_equal(map, data.started))
- cpu_relax();
- if (wait)
- while (!cpus_equal(map, data.finished))
- cpu_relax();
- out:
- if (local) {
- local_irq_disable();
- func(info);
- local_irq_enable();
- }
- }
-
- /*
- * smp_call_function:
- * @func: the function to run; this must be fast and non-blocking
- * @info: an arbitrary pointer to pass to the function
- * @wait: if true, wait (atomically) until function has completed on other CPUs
- *
- * Run a function on all other CPUs.
- *
- * You must not call this function with disabled interrupts, from a
- * hardware interrupt handler or from a bottom half.
- */
- int smp_call_function(void (*func) (void *info), void *info, int wait)
- {
- cpumask_t map;
-
- spin_lock(&call_lock);
- map = cpu_online_map;
- cpu_clear(smp_processor_id(), map);
- __smp_call_function_map(func, info, wait, map);
- spin_unlock(&call_lock);
- return 0;
- }
- EXPORT_SYMBOL(smp_call_function);
-
- /*
- * smp_call_function_single:
- * @cpu: the CPU where func should run
- * @func: the function to run; this must be fast and non-blocking
- * @info: an arbitrary pointer to pass to the function
- * @wait: if true, wait (atomically) until function has completed on other CPUs
- *
- * Run a function on one processor.
- *
- * You must not call this function with disabled interrupts, from a
- * hardware interrupt handler or from a bottom half.
- */
- int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int wait)
- {
- spin_lock(&call_lock);
- __smp_call_function_map(func, info, wait, cpumask_of_cpu(cpu));
- spin_unlock(&call_lock);
- return 0;
- }
- EXPORT_SYMBOL(smp_call_function_single);
-
- /**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on. Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
- int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
- int wait)
- {
- spin_lock(&call_lock);
- cpu_clear(smp_processor_id(), mask);
- __smp_call_function_map(func, info, wait, mask);
- spin_unlock(&call_lock);
- return 0;
- }
- EXPORT_SYMBOL(smp_call_function_mask);
-
void smp_send_stop(void)
{
int cpu, rc;
bits = xchg(&S390_lowcore.ext_call_fast, 0);
if (test_bit(ec_call_function, &bits))
- do_call_function();
+ generic_smp_call_function_interrupt();
+
+ if (test_bit(ec_call_function_single, &bits))
+ generic_smp_call_function_single_interrupt();
}
/*
udelay(10);
}
+ void arch_send_call_function_ipi(cpumask_t mask)
+ {
+ int cpu;
+
+ for_each_cpu_mask(cpu, mask)
+ smp_ext_bitcall(cpu, ec_call_function);
+ }
+
+ void arch_send_call_function_single_ipi(int cpu)
+ {
+ smp_ext_bitcall(cpu, ec_call_function_single);
+ }
+
#ifndef CONFIG_64BIT
/*
* this function sends a 'purge tlb' signal to another CPU.
if (ipl_info.type != IPL_TYPE_FCP_DUMP)
return;
if (cpu >= NR_CPUS) {
- printk(KERN_WARNING "Registers for cpu %i not saved since dump "
- "kernel was compiled with NR_CPUS=%i\n", cpu, NR_CPUS);
+ pr_warning("CPU %i exceeds the maximum %i and is excluded from "
+ "the dump\n", cpu, NR_CPUS - 1);
return;
}
zfcpdump_save_areas[cpu] = kmalloc(sizeof(union save_area), GFP_KERNEL);
}
out:
kfree(info);
- printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus);
+ pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
get_online_cpus();
__smp_rescan_cpus();
put_online_cpus();
preempt_disable();
/* Enable TOD clock interrupts on the secondary cpu. */
init_cpu_timer();
- #ifdef CONFIG_VIRT_TIMER
/* Enable cpu timer interrupts on the secondary cpu. */
init_cpu_vtimer();
- #endif
/* Enable pfault pseudo page faults on this cpu. */
pfault_init();
/* call cpu notifiers */
notify_cpu_starting(smp_processor_id());
/* Mark this cpu as online */
- spin_lock(&call_lock);
+ ipi_call_lock();
cpu_set(smp_processor_id(), cpu_online_map);
- spin_unlock(&call_lock);
+ ipi_call_unlock();
/* Switch on interrupts */
local_irq_enable();
/* Print info about this processor */
save_area = get_zeroed_page(GFP_KERNEL);
if (!save_area)
- goto out_save_area;
+ goto out;
lowcore->extended_save_area_addr = (u32) save_area;
}
#endif
lowcore_ptr[cpu] = lowcore;
return 0;
- #ifndef CONFIG_64BIT
- out_save_area:
- free_page(panic_stack);
- #endif
out:
+ free_page(panic_stack);
free_pages(async_stack, ASYNC_ORDER);
free_pages((unsigned long) lowcore, lc_order);
return -ENOMEM;
ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]),
cpu, sigp_set_prefix);
- if (ccode) {
- printk("sigp_set_prefix failed for cpu %d "
- "with condition code %d\n",
- (int) cpu, (int) ccode);
+ if (ccode)
return -EIO;
- }
idle = current_set[cpu];
cpu_lowcore = lowcore_ptr[cpu];
while (!smp_cpu_not_running(cpu))
cpu_relax();
smp_free_lowcore(cpu);
- printk(KERN_INFO "Processor %d spun down\n", cpu);
+ pr_info("Processor %d stopped\n", cpu);
}
void cpu_die(void)
* Copyright (C) 1991, 1992, 1995 Linus Torvalds
*/
+ #define KMSG_COMPONENT "time"
+ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
+ #include <linux/cpu.h>
+ #include <linux/stop_machine.h>
#include <linux/time.h>
#include <linux/sysdev.h>
#include <linux/delay.h>
#include <asm/delay.h>
#include <asm/s390_ext.h>
#include <asm/div64.h>
+ #include <asm/vdso.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/timer.h>
cd->min_delta_ns = 1;
cd->max_delta_ns = LONG_MAX;
cd->rating = 400;
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
cd->set_next_event = s390_next_event;
cd->set_mode = s390_set_mode;
};
+ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+ {
+ if (clock != &clocksource_tod)
+ return;
+
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_wmb();
+ vdso_data->xtime_tod_stamp = clock->cycle_last;
+ vdso_data->xtime_clock_sec = xtime.tv_sec;
+ vdso_data->xtime_clock_nsec = xtime.tv_nsec;
+ vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
+ smp_wmb();
+ ++vdso_data->tb_update_count;
+ }
+
+ extern struct timezone sys_tz;
+
+ void update_vsyscall_tz(void)
+ {
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_wmb();
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ smp_wmb();
+ ++vdso_data->tb_update_count;
+ }
+
/*
* Initialize the TOD clock and the CPU timer of
* the boot cpu.
/* Enable TOD clock interrupts on the boot cpu. */
init_cpu_timer();
-
- #ifdef CONFIG_VIRT_TIMER
+ /* Enable cpu timer interrupts on the boot cpu. */
vtime_init();
- #endif
}
/*
}
sched_clock_base_cc += delta;
if (adjust.offset != 0) {
- printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
- adjust.offset);
+ pr_notice("The ETR interface has adjusted the clock "
+ "by %li microseconds\n", adjust.offset);
adjust.modes = ADJ_OFFSET_SINGLESHOT;
do_adjtimex(&adjust);
}
atomic_set_mask(0x80000000, sw_ptr);
}
+ /* Single threaded workqueue used for etr and stp sync events */
+ static struct workqueue_struct *time_sync_wq;
+
+ static void __init time_init_wq(void)
+ {
+ if (!time_sync_wq)
+ time_sync_wq = create_singlethread_workqueue("timesync");
+ }
+
/*
* External Time Reference (ETR) code.
*/
static void etr_timeout(unsigned long dummy);
static void etr_work_fn(struct work_struct *work);
+ static DEFINE_MUTEX(etr_work_mutex);
static DECLARE_WORK(etr_work, etr_work_fn);
/*
etr_tolec = get_clock();
set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
} else if (etr_port0_online || etr_port1_online) {
- printk(KERN_WARNING "Running on non ETR capable "
- "machine, only local mode available.\n");
+ pr_warning("The real or virtual hardware system does "
+ "not provide an ETR interface\n");
etr_port0_online = etr_port1_online = 0;
}
}
if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
return 0;
+ time_init_wq();
/* Check if this machine has the steai instruction. */
if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
etr_steai_available = 1;
setup_timer(&etr_timer, etr_timeout, 0UL);
if (etr_port0_online) {
set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
if (etr_port1_online) {
set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
return 0;
}
if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
disable_sync_clock(NULL);
set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
/*
if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
disable_sync_clock(NULL);
set_bit(ETR_EVENT_SYNC_CHECK, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
/*
* Both ports are not up-to-date now.
*/
set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
static void etr_timeout(unsigned long dummy)
{
set_bit(ETR_EVENT_UPDATE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
/*
}
struct clock_sync_data {
+ atomic_t cpus;
int in_sync;
unsigned long long fixup_cc;
+ int etr_port;
+ struct etr_aib *etr_aib;
};
- static void clock_sync_cpu_start(void *dummy)
+ static void clock_sync_cpu(struct clock_sync_data *sync)
{
- struct clock_sync_data *sync = dummy;
-
+ atomic_dec(&sync->cpus);
enable_sync_clock();
/*
* This looks like a busy wait loop but it isn't. etr_sync_cpus
fixup_clock_comparator(sync->fixup_cc);
}
- static void clock_sync_cpu_end(void *dummy)
- {
- }
-
/*
* Sync the TOD clock using the port refered to by aibp. This port
* has to be enabled and the other port has to be disabled. The
* last eacr update has to be more than 1.6 seconds in the past.
*/
- static int etr_sync_clock(struct etr_aib *aib, int port)
+ static int etr_sync_clock(void *data)
{
- struct etr_aib *sync_port;
- struct clock_sync_data etr_sync;
+ static int first;
unsigned long long clock, old_clock, delay, delta;
- int follows;
+ struct clock_sync_data *etr_sync;
+ struct etr_aib *sync_port, *aib;
+ int port;
int rc;
- /* Check if the current aib is adjacent to the sync port aib. */
- sync_port = (port == 0) ? &etr_port0 : &etr_port1;
- follows = etr_aib_follows(sync_port, aib, port);
- memcpy(sync_port, aib, sizeof(*aib));
- if (!follows)
- return -EAGAIN;
+ etr_sync = data;
- /*
- * Catch all other cpus and make them wait until we have
- * successfully synced the clock. smp_call_function will
- * return after all other cpus are in etr_sync_cpu_start.
- */
- memset(&etr_sync, 0, sizeof(etr_sync));
- preempt_disable();
- smp_call_function(clock_sync_cpu_start, &etr_sync, 0);
- local_irq_disable();
+ if (xchg(&first, 1) == 1) {
+ /* Slave */
+ clock_sync_cpu(etr_sync);
+ return 0;
+ }
+
+ /* Wait until all other cpus entered the sync function. */
+ while (atomic_read(&etr_sync->cpus) != 0)
+ cpu_relax();
+
+ port = etr_sync->etr_port;
+ aib = etr_sync->etr_aib;
+ sync_port = (port == 0) ? &etr_port0 : &etr_port1;
enable_sync_clock();
/* Set clock to next OTE. */
delay = (unsigned long long)
(aib->edf2.etv - sync_port->edf2.etv) << 32;
delta = adjust_time(old_clock, clock, delay);
- etr_sync.fixup_cc = delta;
+ etr_sync->fixup_cc = delta;
fixup_clock_comparator(delta);
/* Verify that the clock is properly set. */
if (!etr_aib_follows(sync_port, aib, port)) {
/* Didn't work. */
disable_sync_clock(NULL);
- etr_sync.in_sync = -EAGAIN;
+ etr_sync->in_sync = -EAGAIN;
rc = -EAGAIN;
} else {
- etr_sync.in_sync = 1;
+ etr_sync->in_sync = 1;
rc = 0;
}
} else {
__ctl_clear_bit(0, 29);
__ctl_clear_bit(14, 21);
disable_sync_clock(NULL);
- etr_sync.in_sync = -EAGAIN;
+ etr_sync->in_sync = -EAGAIN;
rc = -EAGAIN;
}
- local_irq_enable();
- smp_call_function(clock_sync_cpu_end, NULL, 0);
- preempt_enable();
+ xchg(&first, 0);
+ return rc;
+ }
+
+ static int etr_sync_clock_stop(struct etr_aib *aib, int port)
+ {
+ struct clock_sync_data etr_sync;
+ struct etr_aib *sync_port;
+ int follows;
+ int rc;
+
+ /* Check if the current aib is adjacent to the sync port aib. */
+ sync_port = (port == 0) ? &etr_port0 : &etr_port1;
+ follows = etr_aib_follows(sync_port, aib, port);
+ memcpy(sync_port, aib, sizeof(*aib));
+ if (!follows)
+ return -EAGAIN;
+ memset(&etr_sync, 0, sizeof(etr_sync));
+ etr_sync.etr_aib = aib;
+ etr_sync.etr_port = port;
+ get_online_cpus();
+ atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
+ rc = stop_machine(etr_sync_clock, &etr_sync, &cpu_online_map);
+ put_online_cpus();
return rc;
}
}
/*
- * ETR tasklet. In this function you'll find the main logic. In
+ * ETR work. In this function you'll find the main logic. In
* particular this is the only function that calls etr_update_eacr(),
* it "controls" the etr control register.
*/
struct etr_aib aib;
int sync_port;
+ /* prevent multiple execution. */
+ mutex_lock(&etr_work_mutex);
+
/* Create working copy of etr_eacr. */
eacr = etr_eacr;
del_timer_sync(&etr_timer);
etr_update_eacr(eacr);
clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
- return;
+ goto out_unlock;
}
/* Store aib to get the current ETR status word. */
eacr.es || sync_port < 0) {
etr_update_eacr(eacr);
etr_set_tolec_timeout(now);
- return;
+ goto out_unlock;
}
/*
etr_update_eacr(eacr);
set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
if (now < etr_tolec + (1600000 << 12) ||
- etr_sync_clock(&aib, sync_port) != 0) {
+ etr_sync_clock_stop(&aib, sync_port) != 0) {
/* Sync failed. Try again in 1/2 second. */
eacr.es = 0;
etr_update_eacr(eacr);
etr_set_sync_timeout();
} else
etr_set_tolec_timeout(now);
+ out_unlock:
+ mutex_unlock(&etr_work_mutex);
}
/*
return count; /* Nothing to do. */
etr_port0_online = value;
set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
} else {
if (etr_port1_online == value)
return count; /* Nothing to do. */
etr_port1_online = value;
set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
return count;
}
static void *stp_page;
static void stp_work_fn(struct work_struct *work);
+ static DEFINE_MUTEX(stp_work_mutex);
static DECLARE_WORK(stp_work, stp_work_fn);
static int __init early_parse_stp(char *p)
if (rc == 0)
set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
else if (stp_online) {
- printk(KERN_WARNING "Running on non STP capable machine.\n");
+ pr_warning("The real or virtual hardware system does "
+ "not provide an STP interface\n");
free_bootmem((unsigned long) stp_page, PAGE_SIZE);
stp_page = NULL;
stp_online = 0;
static int __init stp_init(void)
{
- if (test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online)
- schedule_work(&stp_work);
+ if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return 0;
+ time_init_wq();
+ if (!stp_online)
+ return 0;
+ queue_work(time_sync_wq, &stp_work);
return 0;
}
static void stp_timing_alert(struct stp_irq_parm *intparm)
{
if (intparm->tsc || intparm->lac || intparm->tcpc)
- schedule_work(&stp_work);
+ queue_work(time_sync_wq, &stp_work);
}
/*
if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
return;
disable_sync_clock(NULL);
- schedule_work(&stp_work);
+ queue_work(time_sync_wq, &stp_work);
}
/*
if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
return;
disable_sync_clock(NULL);
- schedule_work(&stp_work);
+ queue_work(time_sync_wq, &stp_work);
}
- /*
- * STP tasklet. Check for the STP state and take over the clock
- * synchronization if the STP clock source is usable.
- */
- static void stp_work_fn(struct work_struct *work)
+
+ static int stp_sync_clock(void *data)
{
- struct clock_sync_data stp_sync;
+ static int first;
unsigned long long old_clock, delta;
+ struct clock_sync_data *stp_sync;
int rc;
- if (!stp_online) {
- chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
- return;
- }
+ stp_sync = data;
- rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
- if (rc)
- return;
+ if (xchg(&first, 1) == 1) {
+ /* Slave */
+ clock_sync_cpu(stp_sync);
+ return 0;
+ }
- rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
- if (rc || stp_info.c == 0)
- return;
+ /* Wait until all other cpus entered the sync function. */
+ while (atomic_read(&stp_sync->cpus) != 0)
+ cpu_relax();
- /*
- * Catch all other cpus and make them wait until we have
- * successfully synced the clock. smp_call_function will
- * return after all other cpus are in clock_sync_cpu_start.
- */
- memset(&stp_sync, 0, sizeof(stp_sync));
- preempt_disable();
- smp_call_function(clock_sync_cpu_start, &stp_sync, 0);
- local_irq_disable();
enable_sync_clock();
set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
rc = 0;
if (stp_info.todoff[0] || stp_info.todoff[1] ||
}
if (rc) {
disable_sync_clock(NULL);
- stp_sync.in_sync = -EAGAIN;
+ stp_sync->in_sync = -EAGAIN;
clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
if (etr_port0_online || etr_port1_online)
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
} else
- stp_sync.in_sync = 1;
+ stp_sync->in_sync = 1;
+ xchg(&first, 0);
+ return 0;
+ }
+
+ /*
+ * STP work. Check for the STP state and take over the clock
+ * synchronization if the STP clock source is usable.
+ */
+ static void stp_work_fn(struct work_struct *work)
+ {
+ struct clock_sync_data stp_sync;
+ int rc;
+
+ /* prevent multiple execution. */
+ mutex_lock(&stp_work_mutex);
+
+ if (!stp_online) {
+ chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+ goto out_unlock;
+ }
+
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+ if (rc)
+ goto out_unlock;
+
+ rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+ if (rc || stp_info.c == 0)
+ goto out_unlock;
+
+ memset(&stp_sync, 0, sizeof(stp_sync));
+ get_online_cpus();
+ atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+ stop_machine(stp_sync_clock, &stp_sync, &cpu_online_map);
+ put_online_cpus();
- local_irq_enable();
- smp_call_function(clock_sync_cpu_end, NULL, 0);
- preempt_enable();
+ out_unlock:
+ mutex_unlock(&stp_work_mutex);
}
/*
if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return -EOPNOTSUPP;
stp_online = value;
- schedule_work(&stp_work);
+ queue_work(time_sync_wq, &stp_work);
return count;
}
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
+ #define KMSG_COMPONENT "cpu"
+ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/workqueue.h>
#include <linux/cpu.h>
#include <linux/smp.h>
+ #include <linux/cpuset.h>
#include <asm/delay.h>
#include <asm/s390_ext.h>
#include <asm/sysinfo.h>
cpumask_t mask;
};
+ static int topology_enabled;
static void topology_work_fn(struct work_struct *work);
static struct tl_info *tl_info;
static struct core_info core_info;
static int machine_has_topology;
- static int machine_has_topology_irq;
static struct timer_list topology_timer;
static void set_topology_timer(void);
static DECLARE_WORK(topology_work, topology_work_fn);
cpumask_t mask;
cpus_clear(mask);
- if (!machine_has_topology)
- return cpu_present_map;
+ if (!topology_enabled || !machine_has_topology)
+ return cpu_possible_map;
spin_lock_irqsave(&topology_lock, flags);
while (core) {
if (cpu_isset(cpu, core->mask)) {
return mask;
}
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+{
+ return &cpu_core_map[cpu];
+}
+
static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
{
unsigned int cpu;
int cpu;
mutex_lock(&smp_cpu_state_mutex);
- for_each_present_cpu(cpu)
+ for_each_possible_cpu(cpu)
smp_cpu_polarization[cpu] = POLARIZATION_HRZ;
mutex_unlock(&smp_cpu_state_mutex);
}
rc = ptf(PTF_HORIZONTAL);
if (rc)
return -EBUSY;
- for_each_present_cpu(cpu)
+ for_each_possible_cpu(cpu)
smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
return rc;
}
{
int cpu;
- for_each_present_cpu(cpu)
+ for_each_possible_cpu(cpu)
cpu_core_map[cpu] = cpu_coregroup_map(cpu);
}
- void arch_update_cpu_topology(void)
+ int arch_update_cpu_topology(void)
{
struct tl_info *info = tl_info;
struct sys_device *sysdev;
if (!machine_has_topology) {
update_cpu_core_map();
topology_update_polarization_simple();
- return;
+ return 0;
}
stsi(info, 15, 1, 2);
tl_to_cores(info);
sysdev = get_cpu_sysdev(cpu);
kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
}
+ return 1;
}
static void topology_work_fn(struct work_struct *work)
{
- arch_reinit_sched_domains();
+ rebuild_sched_domains();
}
void topology_schedule_update(void)
add_timer(&topology_timer);
}
- static void topology_interrupt(__u16 code)
+ static int __init early_parse_topology(char *p)
{
- schedule_work(&topology_work);
+ if (strncmp(p, "on", 2))
+ return 0;
+ topology_enabled = 1;
+ return 0;
}
+ early_param("topology", early_parse_topology);
static int __init init_topology_update(void)
{
goto out;
}
init_timer_deferrable(&topology_timer);
- if (machine_has_topology_irq) {
- rc = register_external_interrupt(0x2005, topology_interrupt);
- if (rc)
- goto out;
- ctl_set_bit(0, 8);
- }
- else
- set_topology_timer();
+ set_topology_timer();
out:
update_cpu_core_map();
return rc;
return;
machine_has_topology = 1;
- if (facility_bits & (1ULL << 51))
- machine_has_topology_irq = 1;
-
tl_info = alloc_bootmem_pages(PAGE_SIZE);
info = tl_info;
stsi(info, 15, 1, 2);
for (i = 0; i < info->mnest - 2; i++)
nr_cores *= info->mag[NR_MAG - 3 - i];
- printk(KERN_INFO "CPU topology:");
+ pr_info("The CPU configuration topology of the machine is:");
for (i = 0; i < NR_MAG; i++)
printk(" %d", info->mag[i]);
printk(" / %d\n", info->mnest);
return;
error:
machine_has_topology = 0;
- machine_has_topology_irq = 0;
}
};
extern int pci_routeirq;
+ extern int noioapicquirk;
+ extern int noioapicreroute;
/* scan a bus after allocating a pci_sysdata for it */
extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
#ifdef CONFIG_NUMA
/* Returns the node based on pci bus */
-static inline int __pcibus_to_node(struct pci_bus *bus)
+static inline int __pcibus_to_node(const struct pci_bus *bus)
{
- struct pci_sysdata *sd = bus->sysdata;
+ const struct pci_sysdata *sd = bus->sysdata;
return sd->node;
}
{
return node_to_cpumask(__pcibus_to_node(bus));
}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+ return cpumask_of_node(__pcibus_to_node(bus));
+}
#endif
#endif /* _ASM_X86_PCI_H */
#include <linux/module.h>
#include <linux/dmi.h>
#include <linux/dmar.h>
+ #include <linux/ftrace.h>
#include <asm/atomic.h>
#include <asm/smp.h>
struct clock_event_device *evt);
static void lapic_timer_setup(enum clock_event_mode mode,
struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
+static void lapic_timer_broadcast(const struct cpumask *mask);
static void apic_pm_activate(void);
/*
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v);
+ apic_write(APIC_TMICT, 0xffffffff);
break;
case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */
/*
* Local APIC timer broadcast function
*/
-static void lapic_timer_broadcast(cpumask_t mask)
+static void lapic_timer_broadcast(const struct cpumask *mask)
{
#ifdef CONFIG_SMP
- send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+ send_IPI_mask(*mask, LOCAL_TIMER_VECTOR);
#endif
}
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
memcpy(levt, &lapic_clockevent, sizeof(*levt));
- levt->cpumask = cpumask_of_cpu(smp_processor_id());
+ levt->cpumask = cpumask_of(smp_processor_id());
clockevents_register_device(levt);
}
} else {
res = (((u64)deltapm) * mult) >> 22;
do_div(res, 1000000);
- printk(KERN_WARNING "APIC calibration not consistent "
+ pr_warning("APIC calibration not consistent "
"with PM Timer: %ldms instead of 100ms\n",
(long)res);
/* Correct the lapic counter value */
res = (((u64)(*delta)) * pm_100ms);
do_div(res, deltapm);
- printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+ pr_info("APIC delta adjusted to PM-Timer: "
"%lu (%ld)\n", (unsigned long)res, *delta);
*delta = (long)res;
}
*/
if (calibration_result < (1000000 / HZ)) {
local_irq_enable();
- printk(KERN_WARNING
- "APIC frequency too slow, disabling apic timer\n");
+ pr_warning("APIC frequency too slow, disabling apic timer\n");
return -1;
}
while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
cpu_relax();
/* Stop the lapic timer */
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
- local_irq_enable();
-
/* Jiffies delta */
deltaj = lapic_cal_j2 - lapic_cal_j1;
apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
local_irq_enable();
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
- printk(KERN_WARNING
- "APIC timer disabled due to verification failure.\n");
+ pr_warning("APIC timer disabled due to verification failure.\n");
return -1;
}
* broadcast mechanism is used. On UP systems simply ignore it.
*/
if (disable_apic_timer) {
- printk(KERN_INFO "Disabling APIC timer\n");
+ pr_info("Disabling APIC timer\n");
/* No broadcast on UP ! */
if (num_possible_cpus() > 1) {
lapic_clockevent.mult = 1;
if (nmi_watchdog != NMI_IO_APIC)
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
else
- printk(KERN_WARNING "APIC timer registered as dummy,"
+ pr_warning("APIC timer registered as dummy,"
" due to nmi_watchdog=%d!\n", nmi_watchdog);
/* Setup the lapic or request the broadcast */
* spurious.
*/
if (!evt->event_handler) {
- printk(KERN_WARNING
- "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+ pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
/* Switch it off */
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
return;
/*
* the NMI deadlock-detector uses this.
*/
- #ifdef CONFIG_X86_64
- add_pda(apic_timer_irqs, 1);
- #else
- per_cpu(irq_stat, cpu).apic_timer_irqs++;
- #endif
+ inc_irq_stat(apic_timer_irqs);
evt->event_handler(evt);
}
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
- void smp_apic_timer_interrupt(struct pt_regs *regs)
+ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
- #ifdef CONFIG_X86_64
exit_idle();
- #endif
irq_enter();
local_apic_timer_interrupt();
irq_exit();
unsigned int oldvalue, value, maxlvt;
if (!lapic_is_integrated()) {
- printk(KERN_INFO "No ESR for 82489DX.\n");
+ pr_info("No ESR for 82489DX.\n");
return;
}
* ESR disabled - we can't do anything useful with the
* errors anyway - mbligh
*/
- printk(KERN_INFO "Leaving ESR disabled.\n");
+ pr_info("Leaving ESR disabled.\n");
return;
}
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (msr & X2APIC_ENABLE) {
- printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+ pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
x2apic_preenabled = x2apic = 1;
apic_ops = &x2apic_ops;
}
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (!(msr & X2APIC_ENABLE)) {
- printk("Enabling x2apic\n");
+ pr_info("Enabling x2apic\n");
wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
}
}
return;
if (!x2apic_preenabled && disable_x2apic) {
- printk(KERN_INFO
- "Skipped enabling x2apic and Interrupt-remapping "
- "because of nox2apic\n");
+ pr_info("Skipped enabling x2apic and Interrupt-remapping "
+ "because of nox2apic\n");
return;
}
panic("Bios already enabled x2apic, can't enforce nox2apic");
if (!x2apic_preenabled && skip_ioapic_setup) {
- printk(KERN_INFO
- "Skipped enabling x2apic and Interrupt-remapping "
- "because of skipping io-apic setup\n");
+ pr_info("Skipped enabling x2apic and Interrupt-remapping "
+ "because of skipping io-apic setup\n");
return;
}
ret = dmar_table_init();
if (ret) {
- printk(KERN_INFO
- "dmar_table_init() failed with %d:\n", ret);
+ pr_info("dmar_table_init() failed with %d:\n", ret);
if (x2apic_preenabled)
panic("x2apic enabled by bios. But IR enabling failed");
else
- printk(KERN_INFO
- "Not enabling x2apic,Intr-remapping\n");
+ pr_info("Not enabling x2apic,Intr-remapping\n");
return;
}
ret = save_mask_IO_APIC_setup();
if (ret) {
- printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+ pr_info("Saving IO-APIC state failed: %d\n", ret);
goto end;
}
if (!ret) {
if (!x2apic_preenabled)
- printk(KERN_INFO
- "Enabled x2apic and interrupt-remapping\n");
+ pr_info("Enabled x2apic and interrupt-remapping\n");
else
- printk(KERN_INFO
- "Enabled Interrupt-remapping\n");
+ pr_info("Enabled Interrupt-remapping\n");
} else
- printk(KERN_ERR
- "Failed to enable Interrupt-remapping and x2apic\n");
+ pr_err("Failed to enable Interrupt-remapping and x2apic\n");
#else
if (!cpu_has_x2apic)
return;
panic("x2apic enabled prior OS handover,"
" enable CONFIG_INTR_REMAP");
- printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
- " and x2apic\n");
+ pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+ " and x2apic\n");
#endif
return;
static int __init detect_init_APIC(void)
{
if (!cpu_has_apic) {
- printk(KERN_INFO "No local APIC present\n");
+ pr_info("No local APIC present\n");
return -1;
}
* "lapic" specified.
*/
if (!force_enable_local_apic) {
- printk(KERN_INFO "Local APIC disabled by BIOS -- "
- "you can enable it with \"lapic\"\n");
+ pr_info("Local APIC disabled by BIOS -- "
+ "you can enable it with \"lapic\"\n");
return -1;
}
/*
*/
rdmsr(MSR_IA32_APICBASE, l, h);
if (!(l & MSR_IA32_APICBASE_ENABLE)) {
- printk(KERN_INFO
- "Local APIC disabled by BIOS -- reenabling.\n");
+ pr_info("Local APIC disabled by BIOS -- reenabling.\n");
l &= ~MSR_IA32_APICBASE_BASE;
l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
wrmsr(MSR_IA32_APICBASE, l, h);
*/
features = cpuid_edx(1);
if (!(features & (1 << X86_FEATURE_APIC))) {
- printk(KERN_WARNING "Could not enable APIC!\n");
+ pr_warning("Could not enable APIC!\n");
return -1;
}
set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
if (l & MSR_IA32_APICBASE_ENABLE)
mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
- printk(KERN_INFO "Found and enabled local APIC!\n");
+ pr_info("Found and enabled local APIC!\n");
apic_pm_activate();
return 0;
no_apic:
- printk(KERN_INFO "No local APIC present or hardware disabled\n");
+ pr_info("No local APIC present or hardware disabled\n");
return -1;
}
#endif
{
#ifdef CONFIG_X86_64
if (disable_apic) {
- printk(KERN_INFO "Apic disabled\n");
+ pr_info("Apic disabled\n");
return -1;
}
if (!cpu_has_apic) {
disable_apic = 1;
- printk(KERN_INFO "Apic disabled by BIOS\n");
+ pr_info("Apic disabled by BIOS\n");
return -1;
}
#else
*/
if (!cpu_has_apic &&
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
- printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
- boot_cpu_physical_apicid);
+ pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+ boot_cpu_physical_apicid);
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
return -1;
}
{
u32 v;
- #ifdef CONFIG_X86_64
exit_idle();
- #endif
irq_enter();
/*
* Check if this really is a spurious interrupt and ACK it
if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
ack_APIC_irq();
- #ifdef CONFIG_X86_64
- add_pda(irq_spurious_count, 1);
- #else
+ inc_irq_stat(irq_spurious_count);
+
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
- printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
- "should never happen.\n", smp_processor_id());
- __get_cpu_var(irq_stat).irq_spurious_count++;
- #endif
+ pr_info("spurious APIC interrupt on CPU#%d, "
+ "should never happen.\n", smp_processor_id());
irq_exit();
}
{
u32 v, v1;
- #ifdef CONFIG_X86_64
exit_idle();
- #endif
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
ack_APIC_irq();
atomic_inc(&irq_err_count);
- /* Here is what the APIC error bits mean:
- 0: Send CS error
- 1: Receive CS error
- 2: Send accept error
- 3: Receive accept error
- 4: Reserved
- 5: Send illegal vector
- 6: Received illegal vector
- 7: Illegal register address
- */
- printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+ /*
+ * Here is what the APIC error bits mean:
+ * 0: Send CS error
+ * 1: Receive CS error
+ * 2: Send accept error
+ * 3: Receive accept error
+ * 4: Reserved
+ * 5: Send illegal vector
+ * 6: Received illegal vector
+ * 7: Illegal register address
+ */
+ pr_debug("APIC error on CPU%d: %02x(%02x)\n",
smp_processor_id(), v , v1);
irq_exit();
}
* Validate version
*/
if (version == 0x0) {
- printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
+ pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
+ "fixing up to 0x10. (tell your hw vendor)\n",
+ version);
version = 0x10;
}
apic_version[apicid] = version;
if (num_processors >= NR_CPUS) {
- printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+ pr_warning("WARNING: NR_CPUS limit of %i reached."
" Processor ignored.\n", NR_CPUS);
return;
}
else if (strcmp("verbose", arg) == 0)
apic_verbosity = APIC_VERBOSE;
else {
- printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+ pr_warning("APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug\n", arg);
return -EINVAL;
}
cpumask_t *mask = &this_leaf->shared_cpu_map;
n = type?
- cpulist_scnprintf(buf, len-2, *mask):
- cpumask_scnprintf(buf, len-2, *mask);
+ cpulist_scnprintf(buf, len-2, mask) :
+ cpumask_scnprintf(buf, len-2, mask);
buf[n++] = '\n';
buf[n] = '\0';
}
return show_shared_cpu_map_func(leaf, 1, buf);
}
- static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
- switch(this_leaf->eax.split.type) {
- case CACHE_TYPE_DATA:
+ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+ {
+ switch (this_leaf->eax.split.type) {
+ case CACHE_TYPE_DATA:
return sprintf(buf, "Data\n");
- break;
- case CACHE_TYPE_INST:
+ case CACHE_TYPE_INST:
return sprintf(buf, "Instruction\n");
- break;
- case CACHE_TYPE_UNIFIED:
+ case CACHE_TYPE_UNIFIED:
return sprintf(buf, "Unified\n");
- break;
- default:
+ default:
return sprintf(buf, "Unknown\n");
- break;
}
}
* HPET address is set in acpi/boot.c, when an ACPI entry exists
*/
unsigned long hpet_address;
- unsigned long hpet_num_timers;
+ #ifdef CONFIG_PCI_MSI
+ static unsigned long hpet_num_timers;
+ #endif
static void __iomem *hpet_virt_address;
struct hpet_dev {
* Start hpet with the boot cpu mask and make it
* global after the IO_APIC has been initialized.
*/
- hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+ hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
clockevents_register_device(&hpet_clockevent);
global_clock_event = &hpet_clockevent;
printk(KERN_DEBUG "hpet clockevent registered\n");
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
hpet_setup_msi_irq(hdev->irq);
disable_irq(hdev->irq);
- irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+ irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
break;
return -1;
disable_irq(dev->irq);
- irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+ irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
enable_irq(dev->irq);
printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
/* 5 usec minimum reprogramming delta. */
evt->min_delta_ns = 5000;
- evt->cpumask = cpumask_of_cpu(hdev->cpu);
+ evt->cpumask = cpumask_of(hdev->cpu);
clockevents_register_device(evt);
}
static int assign_irq_vector(int irq, cpumask_t mask);
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq(unsigned int irq,
+ const struct cpumask *mask)
{
struct irq_cfg *cfg;
unsigned long flags;
cpumask_t tmp;
struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
cfg = irq_cfg(irq);
- if (assign_irq_vector(irq, mask))
+ if (assign_irq_vector(irq, *mask))
return;
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
/*
* Only the high 8 bits are valid.
desc = irq_to_desc(irq);
spin_lock_irqsave(&ioapic_lock, flags);
__target_IO_APIC_irq(irq, dest, cfg->vector);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
#endif /* CONFIG_SMP */
continue;
}
- desc->chip->set_affinity(irq, desc->pending_mask);
+ desc->chip->set_affinity(irq, &desc->pending_mask);
spin_unlock_irqrestore(&desc->lock, flags);
}
}
/*
* Migrates the IRQ destination in the process context.
*/
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
+ const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc->status & IRQ_LEVEL) {
desc->status |= IRQ_MOVE_PENDING;
- desc->pending_mask = mask;
+ cpumask_copy(&desc->pending_mask, mask);
migrate_irq_remapped_level(irq);
return;
}
- migrate_ioapic_irq(irq, mask);
+ migrate_ioapic_irq(irq, *mask);
}
#endif
asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
+
ack_APIC_irq();
- #ifdef CONFIG_X86_64
exit_idle();
- #endif
irq_enter();
me = smp_processor_id();
}
#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_cfg *cfg;
struct msi_msg msg;
cpumask_t tmp;
struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
- if (assign_irq_vector(irq, mask))
+ if (assign_irq_vector(irq, *mask))
return;
cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
read_msi_msg(irq, &msg);
write_msi_msg(irq, &msg);
desc = irq_to_desc(irq);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
#ifdef CONFIG_INTR_REMAP
* Migrate the MSI irq to another cpumask. This migration is
* done in the process context using interrupt-remapping hardware.
*/
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void ir_set_msi_irq_affinity(unsigned int irq,
+ const struct cpumask *mask)
{
struct irq_cfg *cfg;
unsigned int dest;
struct irte irte;
struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
if (get_irte(irq, &irte))
return;
- if (assign_irq_vector(irq, mask))
+ if (assign_irq_vector(irq, *mask))
return;
cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
irte.vector = cfg->vector;
}
desc = irq_to_desc(irq);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
#endif
#endif /* CONFIG_SMP */
#ifdef CONFIG_DMAR
#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_cfg *cfg;
struct msi_msg msg;
cpumask_t tmp;
struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
- if (assign_irq_vector(irq, mask))
+ if (assign_irq_vector(irq, *mask))
return;
cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
dmar_msi_read(irq, &msg);
dmar_msi_write(irq, &msg);
desc = irq_to_desc(irq);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
#endif /* CONFIG_SMP */
#ifdef CONFIG_HPET_TIMER
#ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_cfg *cfg;
struct irq_desc *desc;
unsigned int dest;
cpumask_t tmp;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
- if (assign_irq_vector(irq, mask))
+ if (assign_irq_vector(irq, *mask))
return;
cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
hpet_msi_read(irq, &msg);
hpet_msi_write(irq, &msg);
desc = irq_to_desc(irq);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
#endif /* CONFIG_SMP */
write_ht_irq_msg(irq, &msg);
}
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_cfg *cfg;
unsigned int dest;
cpumask_t tmp;
struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
- if (assign_irq_vector(irq, mask))
+ if (assign_irq_vector(irq, *mask))
return;
cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
target_ht_irq(irq, dest, cfg->vector);
desc = irq_to_desc(irq);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
#endif
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq(irq, mask);
+ set_ir_ioapic_affinity_irq(irq, &mask);
else
#endif
- set_ioapic_affinity_irq(irq, mask);
+ set_ioapic_affinity_irq(irq, &mask);
}
}
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/delay.h>
+ #include <linux/ftrace.h>
#include <asm/uaccess.h>
#include <asm/io_apic.h>
#include <asm/idle.h>
#include <asm/smp.h>
- #ifdef CONFIG_DEBUG_STACKOVERFLOW
/*
* Probabilistic stack overflow check:
*
*/
static inline void stack_overflow_check(struct pt_regs *regs)
{
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
u64 curbase = (u64)task_stack_page(current);
- static unsigned long warned = -60*HZ;
-
- if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
- regs->sp < curbase + sizeof(struct thread_info) + 128 &&
- time_after(jiffies, warned + 60*HZ)) {
- printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
- current->comm, curbase, regs->sp);
- show_stack(NULL,NULL);
- warned = jiffies;
- }
- }
+
+ WARN_ONCE(regs->sp >= curbase &&
+ regs->sp <= curbase + THREAD_SIZE &&
+ regs->sp < curbase + sizeof(struct thread_info) +
+ sizeof(struct pt_regs) + 128,
+
+ "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+ current->comm, curbase, regs->sp);
#endif
+ }
/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
- asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
struct irq_desc *desc;
irq_enter();
irq = __get_cpu_var(vector_irq)[vector];
- #ifdef CONFIG_DEBUG_STACKOVERFLOW
stack_overflow_check(regs);
- #endif
desc = irq_to_desc(irq);
if (likely(desc))
desc->chip->mask(irq);
if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, mask);
+ desc->chip->set_affinity(irq, &mask);
else if (!(warned++))
set_affinity = 0;
#include <asm/mtrr.h>
#include <asm/vmi.h>
#include <asm/genapic.h>
+ #include <asm/setup.h>
#include <linux/mc146818rtc.h>
#include <mach_apic.h>
/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
-/* bitmap of online cpus */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
/*
* Activate a secondary processor.
*/
- static void __cpuinit start_secondary(void *unused)
+ notrace static void __cpuinit start_secondary(void *unused)
{
/*
* Don't put *anything* before cpu_init(), SMP booting is too
* fragile that we want to limit the things done here to the
* most necessary things.
*/
- #ifdef CONFIG_VMI
vmi_bringup();
- #endif
cpu_init();
preempt_disable();
smp_callin();
}
/* maps the cpu to the sched domain representing multi-core */
-cpumask_t cpu_coregroup_map(int cpu)
+const struct cpumask *cpu_coregroup_mask(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
/*
* And for power savings, we return cpu_core_map
*/
if (sched_mc_power_savings || sched_smt_power_savings)
- return per_cpu(cpu_core_map, cpu);
+ return &per_cpu(cpu_core_map, cpu);
else
- return c->llc_shared_map;
+ return &c->llc_shared_map;
+}
+
+cpumask_t cpu_coregroup_map(int cpu)
+{
+ return *cpu_coregroup_mask(cpu);
}
static void impress_friends(void)
pr_debug("Before bogocount - setting activated=1.\n");
}
- static inline void __inquire_remote_apic(int apicid)
+ void __inquire_remote_apic(int apicid)
{
unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
}
}
- #ifdef WAKE_SECONDARY_VIA_NMI
/*
* Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
- static int __devinit
- wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+ int __devinit
+ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt;
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
maxlvt = lapic_get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
return (send_status | accept_status);
}
- #endif /* WAKE_SECONDARY_VIA_NMI */
- #ifdef WAKE_SECONDARY_VIA_INIT
- static int __devinit
- wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+ int __devinit
+ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt, num_starts, j;
return (send_status | accept_status);
}
- #endif /* WAKE_SECONDARY_VIA_INIT */
struct create_idle {
struct work_struct work;
#endif
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
- printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
- "by the BIOS.\n", hard_smp_processor_id());
+ printk(KERN_WARNING
+ "weird, boot CPU (#%d) not listed by the BIOS.\n",
+ hard_smp_processor_id());
+
physid_set(hard_smp_processor_id(), phys_cpu_present_map);
}
* a straightforward 1 to 1 mapping, so force that here. */
__get_cpu_var(vector_irq)[vector] = i;
if (vector != SYSCALL_VECTOR) {
- set_intr_gate(vector, interrupt[vector]);
+ set_intr_gate(vector,
+ interrupt[vector-FIRST_EXTERNAL_VECTOR]);
set_irq_chip_and_handler_name(i, &lguest_irq_controller,
handle_level_irq,
"level");
/* We can't set cpumask in the initializer: damn C limitations! Set it
* here and register our timer device. */
- lguest_clockevent.cpumask = cpumask_of_cpu(0);
+ lguest_clockevent.cpumask = cpumask_of(0);
clockevents_register_device(&lguest_clockevent);
/* Finally, we unblock the timer interrupt. */
u16 priv;
};
+/* total number of cpus in this system (may exceed NR_CPUS) */
+extern unsigned int total_cpus;
+
#ifdef CONFIG_SMP
#include <linux/preempt.h>
})
#define smp_call_function_mask(mask, func, info, wait) \
(up_smp_call_function(func, info))
+ #define smp_call_function_many(mask, func, info, wait) \
+ (up_smp_call_function(func, info))
static inline void init_call_single_data(void)
{
}
Say N.
+ config KALLSYMS_STRIP_GENERATED
+ bool "Strip machine generated symbols from kallsyms"
+ depends on KALLSYMS_ALL
+ default y
+ help
+ Say N if you want kallsyms to retain even machine generated symbols.
+
config KALLSYMS_EXTRA_PASS
bool "Do an extra kallsyms pass"
depends on KALLSYMS
config MARKERS
bool "Activate markers"
+ depends on TRACEPOINTS
help
Place an empty function call at each marker site. Can be
dynamically changed for a probe function.
endif # MODULES
+config INIT_ALL_POSSIBLE
+ bool
+ help
+ Back when each arch used to define their own cpu_online_map and
+ cpu_possible_map, some of them chose to initialize cpu_possible_map
+ with all 1s, and others with all 0s. When they were centralised,
+ it was better to provide this option than to break all the archs
+ and have several arch maintainers persuing me down dark alleys.
+
config STOP_MACHINE
bool
default y
static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+ int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
if (count - len < 2)
return -EINVAL;
len += sprintf(page + len, "\n");
unsigned long full_count = count, err;
cpumask_t new_value;
- err = cpumask_parse_user(buffer, count, new_value);
+ err = cpumask_parse_user(buffer, count, &new_value);
if (err)
return err;
};
#ifdef CONFIG_SMP
- static inline void profile_nop(void *unused)
+ static void profile_nop(void *unused)
{
}
*/
#define RUNTIME_INF ((u64)~0ULL)
+ DEFINE_TRACE(sched_wait_task);
+ DEFINE_TRACE(sched_wakeup);
+ DEFINE_TRACE(sched_wakeup_new);
+ DEFINE_TRACE(sched_switch);
+ DEFINE_TRACE(sched_migrate_task);
+
#ifdef CONFIG_SMP
/*
* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
struct cgroup_subsys_state css;
#endif
+ #ifdef CONFIG_USER_SCHED
+ uid_t uid;
+ #endif
+
#ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each cpu */
struct sched_entity **se;
#ifdef CONFIG_USER_SCHED
+ /* Helper function to pass uid information to create_sched_user() */
+ void set_tg_uid(struct user_struct *user)
+ {
+ user->tg->uid = user->uid;
+ }
+
/*
* Root task group.
* Every UID task group (including init_task_group aka UID-0) will
struct task_group *tg;
#ifdef CONFIG_USER_SCHED
- tg = p->user->tg;
+ rcu_read_lock();
+ tg = __task_cred(p)->user->tg;
+ rcu_read_unlock();
#elif defined(CONFIG_CGROUP_SCHED)
tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
struct task_group, css);
#ifdef CONFIG_SCHEDSTATS
/* latency stats */
struct sched_info rq_sched_info;
+ unsigned long long rq_cpu_time;
+ /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
/* sys_sched_yield() stats */
unsigned int yld_exp_empty;
#undef SCHED_FEAT
- static int sched_feat_open(struct inode *inode, struct file *filp)
- {
- filp->private_data = inode->i_private;
- return 0;
- }
-
- static ssize_t
- sched_feat_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+ static int sched_feat_show(struct seq_file *m, void *v)
{
int i;
for (i = 0; sched_feat_names[i]; i++) {
- len += strlen(sched_feat_names[i]);
- len += 4;
- }
-
- buf = kmalloc(len + 2, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- for (i = 0; sched_feat_names[i]; i++) {
- if (sysctl_sched_features & (1UL << i))
- r += sprintf(buf + r, "%s ", sched_feat_names[i]);
- else
- r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
+ if (!(sysctl_sched_features & (1UL << i)))
+ seq_puts(m, "NO_");
+ seq_printf(m, "%s ", sched_feat_names[i]);
}
+ seq_puts(m, "\n");
- r += sprintf(buf + r, "\n");
- WARN_ON(r >= len + 2);
-
- r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-
- kfree(buf);
-
- return r;
+ return 0;
}
static ssize_t
return cnt;
}
+ static int sched_feat_open(struct inode *inode, struct file *filp)
+ {
+ return single_open(filp, sched_feat_show, NULL);
+ }
+
static struct file_operations sched_feat_fops = {
- .open = sched_feat_open,
- .read = sched_feat_read,
- .write = sched_feat_write,
+ .open = sched_feat_open,
+ .write = sched_feat_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
};
static __init int sched_init_debug(void)
update_group_shares_cpu(struct task_group *tg, int cpu,
unsigned long sd_shares, unsigned long sd_rq_weight)
{
- int boost = 0;
unsigned long shares;
unsigned long rq_weight;
if (!tg->se[cpu])
return;
- rq_weight = tg->cfs_rq[cpu]->load.weight;
-
- /*
- * If there are currently no tasks on the cpu pretend there is one of
- * average load so that when a new task gets to run here it will not
- * get delayed by group starvation.
- */
- if (!rq_weight) {
- boost = 1;
- rq_weight = NICE_0_LOAD;
- }
-
- if (unlikely(rq_weight > sd_rq_weight))
- rq_weight = sd_rq_weight;
+ rq_weight = tg->cfs_rq[cpu]->rq_weight;
/*
* \Sum shares * rq_weight
* \Sum rq_weight
*
*/
- shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+ shares = (sd_shares * rq_weight) / sd_rq_weight;
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
if (abs(shares - tg->se[cpu]->load.weight) >
unsigned long flags;
spin_lock_irqsave(&rq->lock, flags);
- /*
- * record the actual number of shares, not the boosted amount.
- */
- tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
- tg->cfs_rq[cpu]->rq_weight = rq_weight;
+ tg->cfs_rq[cpu]->shares = shares;
__set_se_shares(tg->se[cpu], shares);
spin_unlock_irqrestore(&rq->lock, flags);
*/
static int tg_shares_up(struct task_group *tg, void *data)
{
- unsigned long rq_weight = 0;
+ unsigned long weight, rq_weight = 0;
unsigned long shares = 0;
struct sched_domain *sd = data;
int i;
for_each_cpu_mask(i, sd->span) {
- rq_weight += tg->cfs_rq[i]->load.weight;
+ /*
+ * If there are currently no tasks on the cpu pretend there
+ * is one of average load so that when a new task gets to
+ * run here it will not get delayed by group starvation.
+ */
+ weight = tg->cfs_rq[i]->load.weight;
+ if (!weight)
+ weight = NICE_0_LOAD;
+
+ tg->cfs_rq[i]->rq_weight = weight;
+ rq_weight += weight;
shares += tg->cfs_rq[i]->shares;
}
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
shares = tg->shares;
- if (!rq_weight)
- rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
-
for_each_cpu_mask(i, sd->span)
update_group_shares_cpu(tg, i, shares, rq_weight);
#endif
+ /*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(this_rq->lock)
+ __acquires(busiest->lock)
+ __acquires(this_rq->lock)
+ {
+ int ret = 0;
+
+ if (unlikely(!irqs_disabled())) {
+ /* printk() doesn't work good under rq->lock */
+ spin_unlock(&this_rq->lock);
+ BUG_ON(1);
+ }
+ if (unlikely(!spin_trylock(&busiest->lock))) {
+ if (busiest < this_rq) {
+ spin_unlock(&this_rq->lock);
+ spin_lock(&busiest->lock);
+ spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
+ ret = 1;
+ } else
+ spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+ }
+ return ret;
+ }
+
+ static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(busiest->lock)
+ {
+ spin_unlock(&busiest->lock);
+ lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+ }
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
clock_offset = old_rq->clock - new_rq->clock;
+ trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+
#ifdef CONFIG_SCHEDSTATS
if (p->se.wait_start)
p->se.wait_start -= clock_offset;
smp_wmb();
rq = task_rq_lock(p, &flags);
+ update_rq_clock(rq);
old_state = p->state;
if (!(old_state & state))
goto out;
schedstat_inc(p, se.nr_wakeups_local);
else
schedstat_inc(p, se.nr_wakeups_remote);
- update_rq_clock(rq);
activate_task(rq, p, 1);
success = 1;
out_running:
- trace_sched_wakeup(rq, p);
+ trace_sched_wakeup(rq, p, success);
check_preempt_curr(rq, p, sync);
p->state = TASK_RUNNING;
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
- trace_sched_wakeup_new(rq, p);
+ trace_sched_wakeup_new(rq, p, 1);
check_preempt_curr(rq, p, 0);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
}
/*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- */
- static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
- __releases(this_rq->lock)
- __acquires(busiest->lock)
- __acquires(this_rq->lock)
- {
- int ret = 0;
-
- if (unlikely(!irqs_disabled())) {
- /* printk() doesn't work good under rq->lock */
- spin_unlock(&this_rq->lock);
- BUG_ON(1);
- }
- if (unlikely(!spin_trylock(&busiest->lock))) {
- if (busiest < this_rq) {
- spin_unlock(&this_rq->lock);
- spin_lock(&busiest->lock);
- spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
- ret = 1;
- } else
- spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
- }
- return ret;
- }
-
- static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
- __releases(busiest->lock)
- {
- spin_unlock(&busiest->lock);
- lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
- }
-
- /*
* If dest_cpu is allowed for this process, migrate the task to it.
* This is accomplished by forcing the cpu_allowed mask to only
* allow dest_cpu, which will force the cpu onto dest_cpu. Then
|| unlikely(!cpu_active(dest_cpu)))
goto out;
- trace_sched_migrate_task(rq, p, dest_cpu);
/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
static void idle_balance(int this_cpu, struct rq *this_rq)
{
struct sched_domain *sd;
- int pulled_task = -1;
+ int pulled_task = 0;
unsigned long next_balance = jiffies + HZ;
cpumask_t tmpmask;
set_load_weight(p);
}
+ /*
+ * check the target process has a UID that matches the current process's
+ */
+ static bool check_same_owner(struct task_struct *p)
+ {
+ const struct cred *cred = current_cred(), *pcred;
+ bool match;
+
+ rcu_read_lock();
+ pcred = __task_cred(p);
+ match = (cred->euid == pcred->euid ||
+ cred->euid == pcred->uid);
+ rcu_read_unlock();
+ return match;
+ }
+
static int __sched_setscheduler(struct task_struct *p, int policy,
struct sched_param *param, bool user)
{
return -EPERM;
/* can't change other user's priorities */
- if ((current->euid != p->euid) &&
- (current->euid != p->uid))
+ if (!check_same_owner(p))
return -EPERM;
}
read_unlock(&tasklist_lock);
retval = -EPERM;
- if ((current->euid != p->euid) && (current->euid != p->uid) &&
- !capable(CAP_SYS_NICE))
+ if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
goto out_unlock;
retval = security_task_setscheduler(p, 0, NULL);
* The idle tasks have their own, simple scheduling class:
*/
idle->sched_class = &idle_sched_class;
+ ftrace_graph_init_task(idle);
}
/*
/*
* Figure out where task on dead CPU should go, use force if necessary.
- * NOTE: interrupts should be disabled by the caller
*/
static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
#ifdef CONFIG_SCHED_DEBUG
- static inline const char *sd_level_to_string(enum sched_domain_level lvl)
- {
- switch (lvl) {
- case SD_LV_NONE:
- return "NONE";
- case SD_LV_SIBLING:
- return "SIBLING";
- case SD_LV_MC:
- return "MC";
- case SD_LV_CPU:
- return "CPU";
- case SD_LV_NODE:
- return "NODE";
- case SD_LV_ALLNODES:
- return "ALLNODES";
- case SD_LV_MAX:
- return "MAX";
-
- }
- return "MAX";
- }
-
static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
cpumask_t *groupmask)
{
struct sched_group *group = sd->groups;
char str[256];
- cpulist_scnprintf(str, sizeof(str), sd->span);
+ cpulist_scnprintf(str, sizeof(str), &sd->span);
cpus_clear(*groupmask);
printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
return -1;
}
- printk(KERN_CONT "span %s level %s\n",
- str, sd_level_to_string(sd->level));
+ printk(KERN_CONT "span %s level %s\n", str, sd->name);
if (!cpu_isset(cpu, sd->span)) {
printk(KERN_ERR "ERROR: domain->span does not contain "
cpus_or(*groupmask, *groupmask, group->cpumask);
- cpulist_scnprintf(str, sizeof(str), group->cpumask);
+ cpulist_scnprintf(str, sizeof(str), &group->cpumask);
printk(KERN_CONT " %s", str);
group = group->next;
SD_BALANCE_EXEC |
SD_SHARE_CPUPOWER |
SD_SHARE_PKG_RESOURCES);
+ if (nr_node_ids == 1)
+ pflags &= ~SD_SERIALIZE;
}
if (~cflags & pflags)
return 0;
{
int group;
#ifdef CONFIG_SCHED_MC
- *mask = cpu_coregroup_map(cpu);
+ *mask = *cpu_coregroup_mask(cpu);
cpus_and(*mask, *mask, *cpu_map);
group = first_cpu(*mask);
#elif defined(CONFIG_SCHED_SMT)
};
#if NR_CPUS > 128
- #define SCHED_CPUMASK_ALLOC 1
- #define SCHED_CPUMASK_FREE(v) kfree(v)
- #define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
+ #define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
+ static inline void sched_cpumask_alloc(struct allmasks **masks)
+ {
+ *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
+ }
+ static inline void sched_cpumask_free(struct allmasks *masks)
+ {
+ kfree(masks);
+ }
#else
- #define SCHED_CPUMASK_ALLOC 0
- #define SCHED_CPUMASK_FREE(v)
- #define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
+ #define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
+ static inline void sched_cpumask_alloc(struct allmasks **masks)
+ { }
+ static inline void sched_cpumask_free(struct allmasks *masks)
+ { }
#endif
#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
return -ENOMEM;
}
- #if SCHED_CPUMASK_ALLOC
/* get space for all scratch cpumask variables */
- allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
+ sched_cpumask_alloc(&allmasks);
if (!allmasks) {
printk(KERN_WARNING "Cannot alloc cpumask array\n");
kfree(rd);
#endif
return -ENOMEM;
}
- #endif
+
tmpmask = (cpumask_t *)allmasks;
sd = &per_cpu(core_domains, i);
SD_INIT(sd, MC);
set_domain_attribute(sd, attr);
- sd->span = cpu_coregroup_map(i);
+ sd->span = *cpu_coregroup_mask(i);
cpus_and(sd->span, sd->span, *cpu_map);
sd->parent = p;
p->child = sd;
SCHED_CPUMASK_VAR(this_core_map, allmasks);
SCHED_CPUMASK_VAR(send_covered, allmasks);
- *this_core_map = cpu_coregroup_map(i);
+ *this_core_map = *cpu_coregroup_mask(i);
cpus_and(*this_core_map, *this_core_map, *cpu_map);
if (i != first_cpu(*this_core_map))
continue;
cpu_attach_domain(sd, rd, i);
}
- SCHED_CPUMASK_FREE((void *)allmasks);
+ sched_cpumask_free(allmasks);
return 0;
#ifdef CONFIG_NUMA
error:
free_sched_groups(cpu_map, tmpmask);
- SCHED_CPUMASK_FREE((void *)allmasks);
+ sched_cpumask_free(allmasks);
kfree(rd);
return -ENOMEM;
#endif
*/
static cpumask_t fallback_doms;
- void __attribute__((weak)) arch_update_cpu_topology(void)
+ /*
+ * arch_update_cpu_topology lets virtualized architectures update the
+ * cpu core maps. It is supposed to return 1 if the topology changed
+ * or 0 if it stayed the same.
+ */
+ int __attribute__((weak)) arch_update_cpu_topology(void)
{
+ return 0;
}
/*
cpumask_t tmpmask;
int i;
- unregister_sched_domain_sysctl();
-
for_each_cpu_mask_nr(i, *cpu_map)
cpu_attach_domain(NULL, &def_root_domain, i);
synchronize_sched();
struct sched_domain_attr *dattr_new)
{
int i, j, n;
+ int new_topology;
mutex_lock(&sched_domains_mutex);
/* always unregister in case we don't destroy any domains */
unregister_sched_domain_sysctl();
+ /* Let architecture update cpu core mappings. */
+ new_topology = arch_update_cpu_topology();
+
n = doms_new ? ndoms_new : 0;
/* Destroy deleted domains */
for (i = 0; i < ndoms_cur; i++) {
- for (j = 0; j < n; j++) {
+ for (j = 0; j < n && !new_topology; j++) {
if (cpus_equal(doms_cur[i], doms_new[j])
&& dattrs_equal(dattr_cur, i, dattr_new, j))
goto match1;
ndoms_cur = 0;
doms_new = &fallback_doms;
cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
- dattr_new = NULL;
+ WARN_ON_ONCE(dattr_new);
}
/* Build new domains */
for (i = 0; i < ndoms_new; i++) {
- for (j = 0; j < ndoms_cur; j++) {
+ for (j = 0; j < ndoms_cur && !new_topology; j++) {
if (cpus_equal(doms_new[i], doms_cur[j])
&& dattrs_equal(dattr_new, i, dattr_cur, j))
goto match2;
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
{
struct cfs_rq *cfs_rq;
- struct sched_entity *se, *parent_se;
+ struct sched_entity *se;
struct rq *rq;
int i;
for_each_possible_cpu(i) {
rq = cpu_rq(i);
- cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
+ GFP_KERNEL, cpu_to_node(i));
if (!cfs_rq)
goto err;
- se = kmalloc_node(sizeof(struct sched_entity),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ se = kzalloc_node(sizeof(struct sched_entity),
+ GFP_KERNEL, cpu_to_node(i));
if (!se)
goto err;
- parent_se = parent ? parent->se[i] : NULL;
- init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
+ init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
}
return 1;
int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
{
struct rt_rq *rt_rq;
- struct sched_rt_entity *rt_se, *parent_se;
+ struct sched_rt_entity *rt_se;
struct rq *rq;
int i;
for_each_possible_cpu(i) {
rq = cpu_rq(i);
- rt_rq = kmalloc_node(sizeof(struct rt_rq),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ rt_rq = kzalloc_node(sizeof(struct rt_rq),
+ GFP_KERNEL, cpu_to_node(i));
if (!rt_rq)
goto err;
- rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
+ GFP_KERNEL, cpu_to_node(i));
if (!rt_se)
goto err;
- parent_se = parent ? parent->rt_se[i] : NULL;
- init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
+ init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
}
return 1;
* (balbir@in.ibm.com).
*/
- /* track cpu usage of a group of tasks */
+ /* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};
struct cgroup_subsys cpuacct_subsys;
return ERR_PTR(-ENOMEM);
}
+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
+
return &ca->css;
}
kfree(ca);
}
+ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
+ {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+ u64 data;
+
+ #ifndef CONFIG_64BIT
+ /*
+ * Take rq->lock to make 64-bit read safe on 32-bit platforms.
+ */
+ spin_lock_irq(&cpu_rq(cpu)->lock);
+ data = *cpuusage;
+ spin_unlock_irq(&cpu_rq(cpu)->lock);
+ #else
+ data = *cpuusage;
+ #endif
+
+ return data;
+ }
+
+ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
+ {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+
+ #ifndef CONFIG_64BIT
+ /*
+ * Take rq->lock to make 64-bit write safe on 32-bit platforms.
+ */
+ spin_lock_irq(&cpu_rq(cpu)->lock);
+ *cpuusage = val;
+ spin_unlock_irq(&cpu_rq(cpu)->lock);
+ #else
+ *cpuusage = val;
+ #endif
+ }
+
/* return total cpu usage (in nanoseconds) of a group */
static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
{
u64 totalcpuusage = 0;
int i;
- for_each_possible_cpu(i) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
-
- /*
- * Take rq->lock to make 64-bit addition safe on 32-bit
- * platforms.
- */
- spin_lock_irq(&cpu_rq(i)->lock);
- totalcpuusage += *cpuusage;
- spin_unlock_irq(&cpu_rq(i)->lock);
- }
+ for_each_present_cpu(i)
+ totalcpuusage += cpuacct_cpuusage_read(ca, i);
return totalcpuusage;
}
goto out;
}
- for_each_possible_cpu(i) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
+ for_each_present_cpu(i)
+ cpuacct_cpuusage_write(ca, i, 0);
- spin_lock_irq(&cpu_rq(i)->lock);
- *cpuusage = 0;
- spin_unlock_irq(&cpu_rq(i)->lock);
- }
out:
return err;
}
+ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
+ struct seq_file *m)
+ {
+ struct cpuacct *ca = cgroup_ca(cgroup);
+ u64 percpu;
+ int i;
+
+ for_each_present_cpu(i) {
+ percpu = cpuacct_cpuusage_read(ca, i);
+ seq_printf(m, "%llu ", (unsigned long long) percpu);
+ }
+ seq_printf(m, "\n");
+ return 0;
+ }
+
static struct cftype files[] = {
{
.name = "usage",
.read_u64 = cpuusage_read,
.write_u64 = cpuusage_write,
},
+ {
+ .name = "usage_percpu",
+ .read_seq_string = cpuacct_percpu_seq_read,
+ },
+
};
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;
if (!cpuacct_subsys.active)
return;
+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
+ for (; ca; ca = ca->parent) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}
rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
rq->sched_switch, rq->sched_count, rq->sched_goidle,
rq->ttwu_count, rq->ttwu_local,
- rq->rq_sched_info.cpu_time,
+ rq->rq_cpu_time,
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
seq_printf(seq, "\n");
for_each_domain(cpu, sd) {
enum cpu_idle_type itype;
- cpumask_scnprintf(mask_str, mask_len, sd->span);
+ cpumask_scnprintf(mask_str, mask_len, &sd->span);
seq_printf(seq, "domain%d %s", dcount++, mask_str);
for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
itype++) {
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
{
if (rq)
- rq->rq_sched_info.cpu_time += delta;
+ rq->rq_cpu_time += delta;
}
static inline void
unsigned long long delta = task_rq(t)->clock -
t->sched_info.last_arrival;
- t->sched_info.cpu_time += delta;
rq_sched_info_depart(task_rq(t), delta);
if (t->state == TASK_RUNNING)
#include <linux/gfp.h>
#include <linux/fs.h>
#include <linux/kprobes.h>
+ #include <linux/seq_file.h>
#include <linux/writeback.h>
#include <linux/stacktrace.h>
unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
unsigned long __read_mostly tracing_thresh;
+ /*
+ * We need to change this state when a selftest is running.
+ * A selftest will lurk into the ring-buffer to count the
+ * entries inserted during the selftest although some concurrent
+ * insertions into the ring-buffer such as ftrace_printk could occurred
+ * at the same time, giving false positive or negative results.
+ */
+ static bool __read_mostly tracing_selftest_running;
+
+ /* For tracers that don't implement custom flags */
+ static struct tracer_opt dummy_tracer_opt[] = {
+ { }
+ };
+
+ static struct tracer_flags dummy_tracer_flags = {
+ .val = 0,
+ .opts = dummy_tracer_opt
+ };
+
+ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
+ {
+ return 0;
+ }
+
+ /*
+ * Kill all tracing for good (never come back).
+ * It is initialized to 1 but will turn to zero if the initialization
+ * of the tracer is successful. But that is the only place that sets
+ * this back to zero.
+ */
+ int tracing_disabled = 1;
+
static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
static inline void ftrace_disable_cpu(void)
#define for_each_tracing_cpu(cpu) \
for_each_cpu_mask(cpu, tracing_buffer_mask)
- static int tracing_disabled = 1;
+ /*
+ * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
+ *
+ * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
+ * is set, then ftrace_dump is called. This will output the contents
+ * of the ftrace buffers to the console. This is very useful for
+ * capturing traces that lead to crashes and outputing it to a
+ * serial console.
+ *
+ * It is default off, but you can enable it with either specifying
+ * "ftrace_dump_on_oops" in the kernel command line, or setting
+ * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ */
+ int ftrace_dump_on_oops;
+
+ static int tracing_set_tracer(char *buf);
+
+ static int __init set_ftrace(char *str)
+ {
+ tracing_set_tracer(str);
+ return 1;
+ }
+ __setup("ftrace", set_ftrace);
+
+ static int __init set_ftrace_dump_on_oops(char *str)
+ {
+ ftrace_dump_on_oops = 1;
+ return 1;
+ }
+ __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
long
ns2usecs(cycle_t nsec)
/* tracer_enabled is used to toggle activation of a tracer */
static int tracer_enabled = 1;
+ /**
+ * tracing_is_enabled - return tracer_enabled status
+ *
+ * This function is used by other tracers to know the status
+ * of the tracer_enabled flag. Tracers may use this function
+ * to know if it should enable their features when starting
+ * up. See irqsoff tracer for an example (start_irqsoff_tracer).
+ */
+ int tracing_is_enabled(void)
+ {
+ return tracer_enabled;
+ }
+
/* function tracing enabled */
int ftrace_function_enabled;
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
- /* trace_flags holds iter_ctrl options */
- unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
+ /* trace_flags holds trace_options default values */
+ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
+ TRACE_ITER_ANNOTATE;
/**
* trace_wake_up - wake up tasks waiting for trace input
return nsecs / 1000;
}
- /*
- * TRACE_ITER_SYM_MASK masks the options in trace_flags that
- * control the output of kernel symbols.
- */
- #define TRACE_ITER_SYM_MASK \
- (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
-
/* These must match the bit postions in trace_iterator_flags */
static const char *trace_options[] = {
"print-parent",
"stacktrace",
"sched-tree",
"ftrace_printk",
+ "ftrace_preempt",
+ "branch",
+ "annotate",
+ "userstacktrace",
+ "sym-userobj",
+ "printk-msg-only",
NULL
};
memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
data->pid = tsk->pid;
- data->uid = tsk->uid;
+ data->uid = task_uid(tsk);
data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
data->policy = tsk->policy;
data->rt_priority = tsk->rt_priority;
return trace_seq_putmem(s, hex, j);
}
+ static int
+ trace_seq_path(struct trace_seq *s, struct path *path)
+ {
+ unsigned char *p;
+
+ if (s->len >= (PAGE_SIZE - 1))
+ return 0;
+ p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+ if (!IS_ERR(p)) {
+ p = mangle_path(s->buffer + s->len, p, "\n");
+ if (p) {
+ s->len = p - s->buffer;
+ return 1;
+ }
+ } else {
+ s->buffer[s->len++] = '?';
+ return 1;
+ }
+
+ return 0;
+ }
+
static void
trace_seq_reset(struct trace_seq *s)
{
return -1;
}
+ /*
+ * When this gets called we hold the BKL which means that
+ * preemption is disabled. Various trace selftests however
+ * need to disable and enable preemption for successful tests.
+ * So we drop the BKL here and grab it after the tests again.
+ */
+ unlock_kernel();
mutex_lock(&trace_types_lock);
+
+ tracing_selftest_running = true;
+
for (t = trace_types; t; t = t->next) {
if (strcmp(type->name, t->name) == 0) {
/* already found */
}
}
+ if (!type->set_flag)
+ type->set_flag = &dummy_set_flag;
+ if (!type->flags)
+ type->flags = &dummy_tracer_flags;
+ else
+ if (!type->flags->opts)
+ type->flags->opts = dummy_tracer_opt;
+
#ifdef CONFIG_FTRACE_STARTUP_TEST
if (type->selftest) {
struct tracer *saved_tracer = current_trace;
struct trace_array *tr = &global_trace;
- int saved_ctrl = tr->ctrl;
int i;
+
/*
* Run a selftest on this tracer.
* Here we reset the trace buffer, and set the current
* internal tracing to verify that everything is in order.
* If we fail, we do not register this tracer.
*/
- for_each_tracing_cpu(i) {
+ for_each_tracing_cpu(i)
tracing_reset(tr, i);
- }
+
current_trace = type;
- tr->ctrl = 0;
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
ret = type->selftest(type, tr);
/* the test is responsible for resetting too */
current_trace = saved_tracer;
- tr->ctrl = saved_ctrl;
if (ret) {
printk(KERN_CONT "FAILED!\n");
goto out;
}
/* Only reset on passing, to avoid touching corrupted buffers */
- for_each_tracing_cpu(i) {
+ for_each_tracing_cpu(i)
tracing_reset(tr, i);
- }
+
printk(KERN_CONT "PASSED\n");
}
#endif
max_tracer_type_len = len;
out:
+ tracing_selftest_running = false;
mutex_unlock(&trace_types_lock);
+ lock_kernel();
return ret;
}
ftrace_enable_cpu();
}
+ void tracing_reset_online_cpus(struct trace_array *tr)
+ {
+ int cpu;
+
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+ }
+
#define SAVED_CMDLINES 128
static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
cmdline_idx = 0;
}
+ static int trace_stop_count;
+ static DEFINE_SPINLOCK(tracing_start_lock);
+
+ /**
+ * ftrace_off_permanent - disable all ftrace code permanently
+ *
+ * This should only be called when a serious anomally has
+ * been detected. This will turn off the function tracing,
+ * ring buffers, and other tracing utilites. It takes no
+ * locks and can be called from any context.
+ */
+ void ftrace_off_permanent(void)
+ {
+ tracing_disabled = 1;
+ ftrace_stop();
+ tracing_off_permanent();
+ }
+
+ /**
+ * tracing_start - quick start of the tracer
+ *
+ * If tracing is enabled but was stopped by tracing_stop,
+ * this will start the tracer back up.
+ */
+ void tracing_start(void)
+ {
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ if (tracing_disabled)
+ return;
+
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (--trace_stop_count)
+ goto out;
+
+ if (trace_stop_count < 0) {
+ /* Someone screwed up their debugging */
+ WARN_ON_ONCE(1);
+ trace_stop_count = 0;
+ goto out;
+ }
+
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ ftrace_start();
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+ }
+
+ /**
+ * tracing_stop - quick stop of the tracer
+ *
+ * Light weight way to stop tracing. Use in conjunction with
+ * tracing_start.
+ */
+ void tracing_stop(void)
+ {
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ ftrace_stop();
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (trace_stop_count++)
+ goto out;
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+ }
+
void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
spin_unlock(&trace_cmdline_lock);
}
- static char *trace_find_cmdline(int pid)
+ char *trace_find_cmdline(int pid)
{
char *cmdline = "<...>";
unsigned map;
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
+ entry->tgid = (tsk) ? tsk->tgid : 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
}
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ static void __trace_graph_entry(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct ftrace_graph_ent *trace,
+ unsigned long flags,
+ int pc)
+ {
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ent_entry *entry;
+ unsigned long irq_flags;
+
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
+
+ event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_GRAPH_ENT;
+ entry->graph_ent = *trace;
+ ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+ }
+
+ static void __trace_graph_return(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct ftrace_graph_ret *trace,
+ unsigned long flags,
+ int pc)
+ {
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ret_entry *entry;
+ unsigned long irq_flags;
+
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
+
+ event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_GRAPH_RET;
+ entry->ret = *trace;
+ ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+ }
+ #endif
+
void
ftrace(struct trace_array *tr, struct trace_array_cpu *data,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
ftrace_trace_stack(tr, data, flags, skip, preempt_count());
}
+ static void ftrace_trace_userstack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags, int pc)
+ {
+ #ifdef CONFIG_STACKTRACE
+ struct ring_buffer_event *event;
+ struct userstack_entry *entry;
+ struct stack_trace trace;
+ unsigned long irq_flags;
+
+ if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
+ return;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_USER_STACK;
+
+ memset(&entry->caller, 0, sizeof(entry->caller));
+
+ trace.nr_entries = 0;
+ trace.max_entries = FTRACE_STACK_ENTRIES;
+ trace.skip = 0;
+ trace.entries = entry->caller;
+
+ save_stack_trace_user(&trace);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ #endif
+ }
+
+ void __trace_userstack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags)
+ {
+ ftrace_trace_userstack(tr, data, flags, preempt_count());
+ }
+
static void
ftrace_trace_special(void *__tr, void *__data,
unsigned long arg1, unsigned long arg2, unsigned long arg3,
entry->arg3 = arg3;
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+ ftrace_trace_userstack(tr, data, irq_flags, pc);
trace_wake_up();
}
entry->next_cpu = task_cpu(next);
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
ftrace_trace_stack(tr, data, flags, 5, pc);
+ ftrace_trace_userstack(tr, data, flags, pc);
}
void
entry->next_cpu = task_cpu(wakee);
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
ftrace_trace_stack(tr, data, flags, 6, pc);
+ ftrace_trace_userstack(tr, data, flags, pc);
trace_wake_up();
}
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
+ unsigned long flags;
int cpu;
int pc;
- if (tracing_disabled || !tr->ctrl)
+ if (tracing_disabled)
return;
pc = preempt_count();
- preempt_disable_notrace();
+ local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
- if (likely(!atomic_read(&data->disabled)))
+ if (likely(atomic_inc_return(&data->disabled) == 1))
ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
- preempt_enable_notrace();
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
}
#ifdef CONFIG_FUNCTION_TRACER
static void
- function_trace_call(unsigned long ip, unsigned long parent_ip)
+ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
return;
pc = preempt_count();
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
local_save_flags(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
trace_function(tr, data, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
}
+ static void
+ function_trace_call(unsigned long ip, unsigned long parent_ip)
+ {
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ /*
+ * Need to use raw, since this must be called before the
+ * recursive protection is performed.
+ */
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ trace_function(tr, data, ip, parent_ip, flags, pc);
+ }
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+ }
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ int trace_graph_entry(struct ftrace_graph_ent *trace)
+ {
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (!ftrace_trace_task(current))
+ return 0;
+
+ if (!ftrace_graph_addr(trace->func))
+ return 0;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ __trace_graph_entry(tr, data, trace, flags, pc);
+ }
+ /* Only do the atomic if it is not already set */
+ if (!test_tsk_trace_graph(current))
+ set_tsk_trace_graph(current);
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+
+ return 1;
+ }
+
+ void trace_graph_return(struct ftrace_graph_ret *trace)
+ {
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ __trace_graph_return(tr, data, trace, flags, pc);
+ }
+ if (!trace->depth)
+ clear_tsk_trace_graph(current);
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+ }
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
static struct ftrace_ops trace_ops __read_mostly =
{
.func = function_trace_call,
void tracing_start_function_trace(void)
{
ftrace_function_enabled = 0;
+
+ if (trace_flags & TRACE_ITER_PREEMPTONLY)
+ trace_ops.func = function_trace_call_preempt_only;
+ else
+ trace_ops.func = function_trace_call;
+
register_ftrace_function(&trace_ops);
- if (tracer_enabled)
- ftrace_function_enabled = 1;
+ ftrace_function_enabled = 1;
}
void tracing_stop_function_trace(void)
enum trace_file_type {
TRACE_FILE_LAT_FMT = 1,
+ TRACE_FILE_ANNOTATE = 2,
};
static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
atomic_inc(&trace_record_cmdline_disabled);
- /* let the tracer grab locks here if needed */
- if (current_trace->start)
- current_trace->start(iter);
-
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
static void s_stop(struct seq_file *m, void *p)
{
- struct trace_iterator *iter = m->private;
-
atomic_dec(&trace_record_cmdline_disabled);
-
- /* let the tracer release locks here if needed */
- if (current_trace && current_trace == iter->trace && iter->trace->stop)
- iter->trace->stop(iter);
-
mutex_unlock(&trace_types_lock);
}
# define IP_FMT "%016lx"
#endif
- static int
+ int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
{
int ret;
return ret;
}
+ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+ unsigned long ip, unsigned long sym_flags)
+ {
+ struct file *file = NULL;
+ unsigned long vmstart = 0;
+ int ret = 1;
+
+ if (mm) {
+ const struct vm_area_struct *vma;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, ip);
+ if (vma) {
+ file = vma->vm_file;
+ vmstart = vma->vm_start;
+ }
+ if (file) {
+ ret = trace_seq_path(s, &file->f_path);
+ if (ret)
+ ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
+ }
+ up_read(&mm->mmap_sem);
+ }
+ if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+ ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+ return ret;
+ }
+
+ static int
+ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+ unsigned long sym_flags)
+ {
+ struct mm_struct *mm = NULL;
+ int ret = 1;
+ unsigned int i;
+
+ if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+ struct task_struct *task;
+ /*
+ * we do the lookup on the thread group leader,
+ * since individual threads might have already quit!
+ */
+ rcu_read_lock();
+ task = find_task_by_vpid(entry->ent.tgid);
+ if (task)
+ mm = get_task_mm(task);
+ rcu_read_unlock();
+ }
+
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ unsigned long ip = entry->caller[i];
+
+ if (ip == ULONG_MAX || !ret)
+ break;
+ if (i && ret)
+ ret = trace_seq_puts(s, " <- ");
+ if (!ip) {
+ if (ret)
+ ret = trace_seq_puts(s, "??");
+ continue;
+ }
+ if (!ret)
+ break;
+ if (ret)
+ ret = seq_print_user_ip(s, mm, ip, sym_flags);
+ }
+
+ if (mm)
+ mmput(mm);
+ return ret;
+ }
+
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n");
static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+ static int task_state_char(unsigned long state)
+ {
+ int bit = state ? __ffs(state) + 1 : 0;
+
+ return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+ }
+
/*
* The message is supposed to contain an ending newline.
* If the printing stops prematurely, try to add a newline of our own.
trace_seq_putc(s, '\n');
}
+ static void test_cpu_buff_start(struct trace_iterator *iter)
+ {
+ struct trace_seq *s = &iter->seq;
+
+ if (!(trace_flags & TRACE_ITER_ANNOTATE))
+ return;
+
+ if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
+ return;
+
+ if (cpu_isset(iter->cpu, iter->started))
+ return;
+
+ cpu_set(iter->cpu, iter->started);
+ trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+ }
+
static enum print_line_t
print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
{
char *comm;
int S, T;
int i;
- unsigned state;
if (entry->type == TRACE_CONT)
return TRACE_TYPE_HANDLED;
+ test_cpu_buff_start(iter);
+
next_entry = find_next_entry(iter, NULL, &next_ts);
if (!next_entry)
next_ts = iter->ts;
trace_assign_type(field, entry);
- T = field->next_state < sizeof(state_to_char) ?
- state_to_char[field->next_state] : 'X';
-
- state = field->prev_state ?
- __ffs(field->prev_state) + 1 : 0;
- S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
+ T = task_state_char(field->next_state);
+ S = task_state_char(field->prev_state);
comm = trace_find_cmdline(field->next_pid);
trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
field->prev_pid,
trace_seq_print_cont(s, iter);
break;
}
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
+ case TRACE_USER_STACK: {
+ struct userstack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_userip_objs(field, s, sym_flags);
+ trace_seq_putc(s, '\n');
+ break;
+ }
default:
trace_seq_printf(s, "Unknown type %d\n", entry->type);
}
if (entry->type == TRACE_CONT)
return TRACE_TYPE_HANDLED;
+ test_cpu_buff_start(iter);
+
comm = trace_find_cmdline(iter->ent->pid);
t = ns2usecs(iter->ts);
trace_assign_type(field, entry);
- S = field->prev_state < sizeof(state_to_char) ?
- state_to_char[field->prev_state] : 'X';
- T = field->next_state < sizeof(state_to_char) ?
- state_to_char[field->next_state] : 'X';
+ T = task_state_char(field->next_state);
+ S = task_state_char(field->prev_state);
ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
field->prev_pid,
field->prev_prio,
trace_seq_print_cont(s, iter);
break;
}
+ case TRACE_GRAPH_RET: {
+ return print_graph_function(iter);
+ }
+ case TRACE_GRAPH_ENT: {
+ return print_graph_function(iter);
+ }
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
+ case TRACE_USER_STACK: {
+ struct userstack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ ret = seq_print_userip_objs(field, s, sym_flags);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_putc(s, '\n');
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
}
return TRACE_TYPE_HANDLED;
}
trace_assign_type(field, entry);
- S = field->prev_state < sizeof(state_to_char) ?
- state_to_char[field->prev_state] : 'X';
- T = field->next_state < sizeof(state_to_char) ?
- state_to_char[field->next_state] : 'X';
- if (entry->type == TRACE_WAKE)
- S = '+';
+ T = task_state_char(field->next_state);
+ S = entry->type == TRACE_WAKE ? '+' :
+ task_state_char(field->prev_state);
ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
field->prev_pid,
field->prev_prio,
break;
}
case TRACE_SPECIAL:
+ case TRACE_USER_STACK:
case TRACE_STACK: {
struct special_entry *field;
trace_assign_type(field, entry);
- S = field->prev_state < sizeof(state_to_char) ?
- state_to_char[field->prev_state] : 'X';
- T = field->next_state < sizeof(state_to_char) ?
- state_to_char[field->next_state] : 'X';
- if (entry->type == TRACE_WAKE)
- S = '+';
+ T = task_state_char(field->next_state);
+ S = entry->type == TRACE_WAKE ? '+' :
+ task_state_char(field->prev_state);
SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
SEQ_PUT_HEX_FIELD_RET(s, S);
break;
}
case TRACE_SPECIAL:
+ case TRACE_USER_STACK:
case TRACE_STACK: {
struct special_entry *field;
return TRACE_TYPE_HANDLED;
}
+ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
+ {
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+ struct print_entry *field;
+ int ret;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, field->buf);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (entry->flags & TRACE_FLAG_CONT)
+ trace_seq_print_cont(s, iter);
+
+ return TRACE_TYPE_HANDLED;
+ }
+
static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
break;
}
case TRACE_SPECIAL:
+ case TRACE_USER_STACK:
case TRACE_STACK: {
struct special_entry *field;
return ret;
}
+ if (iter->ent->type == TRACE_PRINT &&
+ trace_flags & TRACE_ITER_PRINTK &&
+ trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+ return print_printk_msg_only(iter);
+
if (trace_flags & TRACE_ITER_BIN)
return print_bin_fmt(iter);
seq_printf(m, "# tracer: %s\n", iter->trace->name);
seq_puts(m, "#\n");
}
- if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+ if (iter->trace && iter->trace->print_header)
+ iter->trace->print_header(m);
+ else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
/* print nothing if the buffers are empty */
if (trace_empty(iter))
return 0;
iter->trace = current_trace;
iter->pos = -1;
+ /* Notify the tracer early; before we stop tracing. */
+ if (iter->trace && iter->trace->open)
+ iter->trace->open(iter);
+
+ /* Annotate start of buffers if we had overruns */
+ if (ring_buffer_overruns(iter->tr->buffer))
+ iter->iter_flags |= TRACE_FILE_ANNOTATE;
+
+
for_each_tracing_cpu(cpu) {
iter->buffer_iter[cpu] =
m->private = iter;
/* stop the trace while dumping */
- if (iter->tr->ctrl) {
- tracer_enabled = 0;
- ftrace_function_enabled = 0;
- }
-
- if (iter->trace && iter->trace->open)
- iter->trace->open(iter);
+ tracing_stop();
mutex_unlock(&trace_types_lock);
iter->trace->close(iter);
/* reenable tracing if it was previously enabled */
- if (iter->tr->ctrl) {
- tracer_enabled = 1;
- /*
- * It is safe to enable function tracing even if it
- * isn't used
- */
- ftrace_function_enabled = 1;
- }
+ tracing_start();
mutex_unlock(&trace_types_lock);
seq_release(inode, file);
mutex_lock(&tracing_cpumask_update_lock);
- len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+ len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
if (count - len < 2) {
count = -EINVAL;
goto out_err;
int err, cpu;
mutex_lock(&tracing_cpumask_update_lock);
- err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+ err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
if (err)
goto err_unlock;
- raw_local_irq_disable();
+ local_irq_disable();
__raw_spin_lock(&ftrace_max_lock);
for_each_tracing_cpu(cpu) {
/*
}
}
__raw_spin_unlock(&ftrace_max_lock);
- raw_local_irq_enable();
+ local_irq_enable();
tracing_cpumask = tracing_cpumask_new;
};
static ssize_t
- tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
+ tracing_trace_options_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ int i;
char *buf;
int r = 0;
int len = 0;
- int i;
+ u32 tracer_flags = current_trace->flags->val;
+ struct tracer_opt *trace_opts = current_trace->flags->opts;
+
/* calulate max size */
for (i = 0; trace_options[i]; i++) {
len += 3; /* "no" and space */
}
+ /*
+ * Increase the size with names of options specific
+ * of the current tracer.
+ */
+ for (i = 0; trace_opts[i].name; i++) {
+ len += strlen(trace_opts[i].name);
+ len += 3; /* "no" and space */
+ }
+
/* +2 for \n and \0 */
buf = kmalloc(len + 2, GFP_KERNEL);
if (!buf)
r += sprintf(buf + r, "no%s ", trace_options[i]);
}
+ for (i = 0; trace_opts[i].name; i++) {
+ if (tracer_flags & trace_opts[i].bit)
+ r += sprintf(buf + r, "%s ",
+ trace_opts[i].name);
+ else
+ r += sprintf(buf + r, "no%s ",
+ trace_opts[i].name);
+ }
+
r += sprintf(buf + r, "\n");
WARN_ON(r >= len + 2);
return r;
}
+ /* Try to assign a tracer specific option */
+ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
+ {
+ struct tracer_flags *trace_flags = trace->flags;
+ struct tracer_opt *opts = NULL;
+ int ret = 0, i = 0;
+ int len;
+
+ for (i = 0; trace_flags->opts[i].name; i++) {
+ opts = &trace_flags->opts[i];
+ len = strlen(opts->name);
+
+ if (strncmp(cmp, opts->name, len) == 0) {
+ ret = trace->set_flag(trace_flags->val,
+ opts->bit, !neg);
+ break;
+ }
+ }
+ /* Not found */
+ if (!trace_flags->opts[i].name)
+ return -EINVAL;
+
+ /* Refused to handle */
+ if (ret)
+ return ret;
+
+ if (neg)
+ trace_flags->val &= ~opts->bit;
+ else
+ trace_flags->val |= opts->bit;
+
+ return 0;
+ }
+
static ssize_t
- tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
+ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
char *cmp = buf;
int neg = 0;
+ int ret;
int i;
if (cnt >= sizeof(buf))
break;
}
}
- /*
- * If no option could be set, return an error:
- */
- if (!trace_options[i])
- return -EINVAL;
+
+ /* If no option could be set, test the specific tracer options */
+ if (!trace_options[i]) {
+ ret = set_tracer_option(current_trace, cmp, neg);
+ if (ret)
+ return ret;
+ }
filp->f_pos += cnt;
static struct file_operations tracing_iter_fops = {
.open = tracing_open_generic,
- .read = tracing_iter_ctrl_read,
- .write = tracing_iter_ctrl_write,
+ .read = tracing_trace_options_read,
+ .write = tracing_trace_options_write,
};
static const char readme_msg[] =
"# echo sched_switch > /debug/tracing/current_tracer\n"
"# cat /debug/tracing/current_tracer\n"
"sched_switch\n"
- "# cat /debug/tracing/iter_ctrl\n"
+ "# cat /debug/tracing/trace_options\n"
"noprint-parent nosym-offset nosym-addr noverbose\n"
- "# echo print-parent > /debug/tracing/iter_ctrl\n"
+ "# echo print-parent > /debug/tracing/trace_options\n"
"# echo 1 > /debug/tracing/tracing_enabled\n"
"# cat /debug/tracing/trace > /tmp/trace.txt\n"
"echo 0 > /debug/tracing/tracing_enabled\n"
tracing_ctrl_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
int r;
- r = sprintf(buf, "%ld\n", tr->ctrl);
+ r = sprintf(buf, "%u\n", tracer_enabled);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
val = !!val;
mutex_lock(&trace_types_lock);
- if (tr->ctrl ^ val) {
- if (val)
+ if (tracer_enabled ^ val) {
+ if (val) {
tracer_enabled = 1;
- else
+ if (current_trace->start)
+ current_trace->start(tr);
+ tracing_start();
+ } else {
tracer_enabled = 0;
-
- tr->ctrl = val;
-
- if (current_trace && current_trace->ctrl_update)
- current_trace->ctrl_update(tr);
+ tracing_stop();
+ if (current_trace->stop)
+ current_trace->stop(tr);
+ }
}
mutex_unlock(&trace_types_lock);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
- static ssize_t
- tracing_set_trace_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
+ static int tracing_set_tracer(char *buf)
{
struct trace_array *tr = &global_trace;
struct tracer *t;
- char buf[max_tracer_type_len+1];
- int i;
- size_t ret;
-
- ret = cnt;
-
- if (cnt > max_tracer_type_len)
- cnt = max_tracer_type_len;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- /* strip ending whitespace. */
- for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
- buf[i] = 0;
+ int ret = 0;
mutex_lock(&trace_types_lock);
for (t = trace_types; t; t = t->next) {
if (t == current_trace)
goto out;
+ trace_branch_disable();
if (current_trace && current_trace->reset)
current_trace->reset(tr);
current_trace = t;
- if (t->init)
- t->init(tr);
+ if (t->init) {
+ ret = t->init(tr);
+ if (ret)
+ goto out;
+ }
+ trace_branch_enable(tr);
out:
mutex_unlock(&trace_types_lock);
- if (ret > 0)
- filp->f_pos += ret;
+ return ret;
+ }
+
+ static ssize_t
+ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+ {
+ char buf[max_tracer_type_len+1];
+ int i;
+ size_t ret;
+ int err;
+
+ ret = cnt;
+
+ if (cnt > max_tracer_type_len)
+ cnt = max_tracer_type_len;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ /* strip ending whitespace. */
+ for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
+ buf[i] = 0;
+
+ err = tracing_set_tracer(buf);
+ if (err)
+ return err;
+
+ filp->f_pos += ret;
return ret;
}
return -ENOMEM;
mutex_lock(&trace_types_lock);
+
+ /* trace pipe does not show start of buffer */
+ cpus_setall(iter->started);
+
iter->tr = &global_trace;
iter->trace = current_trace;
filp->private_data = iter;
char buf[64];
int r;
- r = sprintf(buf, "%lu\n", tr->entries);
+ r = sprintf(buf, "%lu\n", tr->entries >> 10);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
unsigned long val;
char buf[64];
int ret, cpu;
- struct trace_array *tr = filp->private_data;
if (cnt >= sizeof(buf))
return -EINVAL;
mutex_lock(&trace_types_lock);
- if (tr->ctrl) {
- cnt = -EBUSY;
- pr_info("ftrace: please disable tracing"
- " before modifying buffer size\n");
- goto out;
- }
+ tracing_stop();
/* disable all cpu buffers */
for_each_tracing_cpu(cpu) {
atomic_inc(&max_tr.data[cpu]->disabled);
}
+ /* value is in KB */
+ val <<= 10;
+
if (val != global_trace.entries) {
ret = ring_buffer_resize(global_trace.buffer, val);
if (ret < 0) {
atomic_dec(&max_tr.data[cpu]->disabled);
}
+ tracing_start();
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
int ret;
va_list args;
va_start(args, fmt);
- ret = trace_vprintk(0, fmt, args);
+ ret = trace_vprintk(0, -1, fmt, args);
va_end(args);
return ret;
}
{
char *buf;
char *end;
- struct trace_array *tr = &global_trace;
- if (!tr->ctrl || tracing_disabled)
+ if (tracing_disabled)
return -EINVAL;
if (cnt > TRACE_BUF_SIZE)
#ifdef CONFIG_DYNAMIC_FTRACE
+ int __weak ftrace_arch_read_dyn_info(char *buf, int size)
+ {
+ return 0;
+ }
+
static ssize_t
- tracing_read_long(struct file *filp, char __user *ubuf,
+ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ static char ftrace_dyn_info_buffer[1024];
+ static DEFINE_MUTEX(dyn_info_mutex);
unsigned long *p = filp->private_data;
- char buf[64];
+ char *buf = ftrace_dyn_info_buffer;
+ int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
int r;
- r = sprintf(buf, "%ld\n", *p);
+ mutex_lock(&dyn_info_mutex);
+ r = sprintf(buf, "%ld ", *p);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
+ buf[r++] = '\n';
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+
+ mutex_unlock(&dyn_info_mutex);
+
+ return r;
}
- static struct file_operations tracing_read_long_fops = {
+ static struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
- .read = tracing_read_long,
+ .read = tracing_read_dyn_info,
};
#endif
if (!entry)
pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
- entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
+ entry = debugfs_create_file("trace_options", 0644, d_tracer,
NULL, &tracing_iter_fops);
if (!entry)
- pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
+ pr_warning("Could not create debugfs 'trace_options' entry\n");
entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
NULL, &tracing_cpumask_fops);
pr_warning("Could not create debugfs "
"'trace_pipe' entry\n");
- entry = debugfs_create_file("trace_entries", 0644, d_tracer,
+ entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
&global_trace, &tracing_entries_fops);
if (!entry)
pr_warning("Could not create debugfs "
- "'trace_entries' entry\n");
+ "'buffer_size_kb' entry\n");
entry = debugfs_create_file("trace_marker", 0220, d_tracer,
NULL, &tracing_mark_fops);
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt,
- &tracing_read_long_fops);
+ &tracing_dyn_info_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'dyn_ftrace_total_info' entry\n");
return 0;
}
- int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
{
static DEFINE_SPINLOCK(trace_buf_lock);
static char trace_buf[TRACE_BUF_SIZE];
struct ring_buffer_event *event;
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
- struct print_entry *entry;
- unsigned long flags, irq_flags;
int cpu, len = 0, size, pc;
+ struct print_entry *entry;
+ unsigned long irq_flags;
- if (!tr->ctrl || tracing_disabled)
+ if (tracing_disabled || tracing_selftest_running)
return 0;
pc = preempt_count();
if (unlikely(atomic_read(&data->disabled)))
goto out;
- spin_lock_irqsave(&trace_buf_lock, flags);
+ pause_graph_tracing();
+ spin_lock_irqsave(&trace_buf_lock, irq_flags);
len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
len = min(len, TRACE_BUF_SIZE-1);
if (!event)
goto out_unlock;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
+ tracing_generic_entry_update(&entry->ent, irq_flags, pc);
entry->ent.type = TRACE_PRINT;
entry->ip = ip;
+ entry->depth = depth;
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = 0;
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
out_unlock:
- spin_unlock_irqrestore(&trace_buf_lock, flags);
-
+ spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
+ unpause_graph_tracing();
out:
preempt_enable_notrace();
return 0;
va_start(ap, fmt);
- ret = trace_vprintk(ip, fmt, ap);
+ ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
va_end(ap);
return ret;
}
static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
return NOTIFY_OK;
}
{
switch (val) {
case DIE_OOPS:
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
break;
default:
break;
trace_seq_reset(s);
}
-
void ftrace_dump(void)
{
static DEFINE_SPINLOCK(ftrace_dump_lock);
atomic_inc(&global_trace.data[cpu]->disabled);
}
+ /* don't look at user memory in panic mode */
+ trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+
printk(KERN_TRACE "Dumping ftrace buffer:\n");
iter.tr = &global_trace;
#endif
/* All seems OK, enable tracing */
- global_trace.ctrl = tracer_enabled;
tracing_disabled = 0;
atomic_notifier_chain_register(&panic_notifier_list,
config LIBCRC32C
tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check"
+ select CRYPTO
+ select CRYPTO_CRC32C
help
This option is provided for the case where no in-kernel-tree
modules require CRC32c functions, but a module built outside the
config HAVE_LMB
boolean
+config CPUMASK_OFFSTACK
+ bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
+ help
+ Use dynamic allocation for cpumask_var_t, instead of putting
+ them on the stack. This is a bit more expensive, but avoids
+ stack overflow.
+
endmenu