default y
select HAVE_IDE
select HAVE_OPROFILE
+ select INIT_ALL_POSSIBLE
config SBUS
bool
bool
default y
- config SCHED_NO_NO_OMIT_FRAME_POINTER
+ config SCHED_OMIT_FRAME_POINTER
bool
default y
early_param("noapic", parse_noapic);
struct irq_pin_list;
+
+ /*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+ struct irq_pin_list {
+ int apic, pin;
+ struct irq_pin_list *next;
+ };
+
+ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+ {
+ struct irq_pin_list *pin;
+ int node;
+
+ node = cpu_to_node(cpu);
+
+ pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+ printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+
+ return pin;
+ }
+
struct irq_cfg {
- unsigned int irq;
struct irq_pin_list *irq_2_pin;
cpumask_t domain;
cpumask_t old_domain;
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+ #ifdef CONFIG_SPARSE_IRQ
+ static struct irq_cfg irq_cfgx[] = {
+ #else
static struct irq_cfg irq_cfgx[NR_IRQS] = {
- [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
- [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
- [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
- [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
- [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
- [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
- [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
- [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
- [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
- [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
- [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
- [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
- [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
- [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
- [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
- [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+ #endif
+ [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
+ [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
+ [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
+ [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
+ [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
+ [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
+ [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
+ [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
+ [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
+ [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
+ [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+ [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+ [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+ [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+ [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+ [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
};
- #define for_each_irq_cfg(irq, cfg) \
- for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
-
- static struct irq_cfg *irq_cfg(unsigned int irq)
+ void __init arch_early_irq_init(void)
{
- return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+ int count;
+ int i;
+
+ cfg = irq_cfgx;
+ count = ARRAY_SIZE(irq_cfgx);
+
+ for (i = 0; i < count; i++) {
+ desc = irq_to_desc(i);
+ desc->chip_data = &cfg[i];
+ }
}
- static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+ #ifdef CONFIG_SPARSE_IRQ
+ static struct irq_cfg *irq_cfg(unsigned int irq)
{
- return irq_cfg(irq);
+ struct irq_cfg *cfg = NULL;
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+ if (desc)
+ cfg = desc->chip_data;
+
+ return cfg;
}
- /*
- * Rough estimation of how many shared IRQs there are, can be changed
- * anytime.
- */
- #define MAX_PLUS_SHARED_IRQS NR_IRQS
- #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+ {
+ struct irq_cfg *cfg;
+ int node;
- /*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
+ node = cpu_to_node(cpu);
- struct irq_pin_list {
- int apic, pin;
- struct irq_pin_list *next;
- };
+ cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+ printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
- static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
- static struct irq_pin_list *irq_2_pin_ptr;
+ return cfg;
+ }
- static void __init irq_2_pin_init(void)
+ void arch_init_chip_data(struct irq_desc *desc, int cpu)
{
- struct irq_pin_list *pin = irq_2_pin_head;
- int i;
-
- for (i = 1; i < PIN_MAP_SIZE; i++)
- pin[i-1].next = &pin[i];
+ struct irq_cfg *cfg;
- irq_2_pin_ptr = &pin[0];
+ cfg = desc->chip_data;
+ if (!cfg) {
+ desc->chip_data = get_one_free_irq_cfg(cpu);
+ if (!desc->chip_data) {
+ printk(KERN_ERR "can not alloc irq_cfg\n");
+ BUG_ON(1);
+ }
+ }
}
- static struct irq_pin_list *get_one_free_irq_2_pin(void)
+ #else
+ static struct irq_cfg *irq_cfg(unsigned int irq)
{
- struct irq_pin_list *pin = irq_2_pin_ptr;
+ return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ }
- if (!pin)
- panic("can not get more irq_2_pin\n");
+ #endif
- irq_2_pin_ptr = pin->next;
- pin->next = NULL;
- return pin;
+ static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+ {
}
struct io_apic {
writel(value, &io_apic->data);
}
- static bool io_apic_level_ack_pending(unsigned int irq)
+ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
unsigned long flags;
- struct irq_cfg *cfg = irq_cfg(irq);
spin_lock_irqsave(&ioapic_lock, flags);
entry = cfg->irq_2_pin;
}
#ifdef CONFIG_SMP
- static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
{
int apic, pin;
- struct irq_cfg *cfg;
struct irq_pin_list *entry;
+ u8 vector = cfg->vector;
- cfg = irq_cfg(irq);
entry = cfg->irq_2_pin;
for (;;) {
unsigned int reg;
}
}
- static int assign_irq_vector(int irq, cpumask_t mask);
+ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
- static void set_ioapic_affinity_irq(unsigned int irq,
- const struct cpumask *mask)
-static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
++static void set_ioapic_affinity_irq_desc(struct irq_desc *desc,
++ const struct cpumask *mask)
{
struct irq_cfg *cfg;
unsigned long flags;
unsigned int dest;
cpumask_t tmp;
- struct irq_desc *desc;
+ unsigned int irq;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
- cfg = irq_cfg(irq);
- if (assign_irq_vector(irq, *mask))
+ irq = desc->irq;
+ cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
++ if (assign_irq_vector(irq, cfg, *mask))
return;
- set_extra_move_desc(desc, mask);
++ set_extra_move_desc(desc, *mask);
+
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
/*
* Only the high 8 bits are valid.
*/
dest = SET_APIC_LOGICAL_ID(dest);
- desc = irq_to_desc(irq);
spin_lock_irqsave(&ioapic_lock, flags);
- __target_IO_APIC_irq(irq, dest, cfg->vector);
+ __target_IO_APIC_irq(irq, dest, cfg);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+
++static void set_ioapic_affinity_irq(unsigned int irq,
++ const struct cpumask *mask)
+ {
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+
+ set_ioapic_affinity_irq_desc(desc, mask);
+ }
#endif /* CONFIG_SMP */
/*
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
- static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
{
- struct irq_cfg *cfg;
struct irq_pin_list *entry;
- /* first time to refer irq_cfg, so with new */
- cfg = irq_cfg_alloc(irq);
entry = cfg->irq_2_pin;
if (!entry) {
- entry = get_one_free_irq_2_pin();
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry) {
+ printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+ apic, pin);
+ return;
+ }
cfg->irq_2_pin = entry;
entry->apic = apic;
entry->pin = pin;
entry = entry->next;
}
- entry->next = get_one_free_irq_2_pin();
+ entry->next = get_one_free_irq_2_pin(cpu);
entry = entry->next;
entry->apic = apic;
entry->pin = pin;
/*
* Reroute an IRQ to a different pin.
*/
- static void __init replace_pin_at_irq(unsigned int irq,
+ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
int oldapic, int oldpin,
int newapic, int newpin)
{
- struct irq_cfg *cfg = irq_cfg(irq);
struct irq_pin_list *entry = cfg->irq_2_pin;
int replaced = 0;
/* why? call replace before add? */
if (!replaced)
- add_pin_to_irq(irq, newapic, newpin);
+ add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
}
- static inline void io_apic_modify_irq(unsigned int irq,
+ static inline void io_apic_modify_irq(struct irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
int pin;
- struct irq_cfg *cfg;
struct irq_pin_list *entry;
- cfg = irq_cfg(irq);
for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
unsigned int reg;
pin = entry->pin;
}
}
- static void __unmask_IO_APIC_irq(unsigned int irq)
+ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
#ifdef CONFIG_X86_64
readl(&io_apic->data);
}
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
#else /* CONFIG_X86_32 */
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
}
- static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+ static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
IO_APIC_REDIR_MASKED, NULL);
}
- static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
#endif /* CONFIG_X86_32 */
- static void mask_IO_APIC_irq (unsigned int irq)
+ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
+ struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
+ BUG_ON(!cfg);
+
spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(irq);
+ __mask_IO_APIC_irq(cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
- static void unmask_IO_APIC_irq (unsigned int irq)
+ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
{
+ struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(irq);
+ __unmask_IO_APIC_irq(cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
+ static void mask_IO_APIC_irq(unsigned int irq)
+ {
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ mask_IO_APIC_irq_desc(desc);
+ }
+ static void unmask_IO_APIC_irq(unsigned int irq)
+ {
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ unmask_IO_APIC_irq_desc(desc);
+ }
+
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
*/
static int EISA_ELCR(unsigned int irq)
{
- if (irq < 16) {
+ if (irq < NR_IRQS_LEGACY) {
unsigned int port = 0x4d0 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
spin_unlock(&vector_lock);
}
- static int __assign_irq_vector(int irq, cpumask_t mask)
+ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
{
/*
* NOTE! The local APIC isn't very good at handling
static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
unsigned int old_vector;
int cpu;
- struct irq_cfg *cfg;
- cfg = irq_cfg(irq);
+ if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+ return -EBUSY;
/* Only try and allocate irqs on cpus that are present */
cpus_and(mask, mask, cpu_online_map);
- if ((cfg->move_in_progress) || cfg->move_cleanup_count)
- return -EBUSY;
-
old_vector = cfg->vector;
if (old_vector) {
cpumask_t tmp;
return -ENOSPC;
}
- static int assign_irq_vector(int irq, cpumask_t mask)
+ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
{
int err;
unsigned long flags;
spin_lock_irqsave(&vector_lock, flags);
- err = __assign_irq_vector(irq, mask);
+ err = __assign_irq_vector(irq, cfg, mask);
spin_unlock_irqrestore(&vector_lock, flags);
return err;
}
- static void __clear_irq_vector(int irq)
+ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
{
cpumask_t mask;
int cpu, vector;
- cfg = irq_cfg(irq);
BUG_ON(!cfg->vector);
vector = cfg->vector;
/* This function must be called with vector_lock held */
int irq, vector;
struct irq_cfg *cfg;
+ struct irq_desc *desc;
/* Mark the inuse vectors */
- for_each_irq_cfg(irq, cfg) {
+ for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+ cfg = desc->chip_data;
if (!cpu_isset(cpu, cfg->domain))
continue;
vector = cfg->vector;
}
#endif
- static void ioapic_register_intr(int irq, unsigned long trigger)
+ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
trigger == IOAPIC_LEVEL)
return 0;
}
- static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
int trigger, int polarity)
{
struct irq_cfg *cfg;
if (!IO_APIC_IRQ(irq))
return;
- cfg = irq_cfg(irq);
+ cfg = desc->chip_data;
mask = TARGET_CPUS;
- if (assign_irq_vector(irq, mask))
+ if (assign_irq_vector(irq, cfg, mask))
return;
cpus_and(mask, cfg->domain, mask);
cfg->vector)) {
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mp_ioapics[apic].mp_apicid, pin);
- __clear_irq_vector(irq);
+ __clear_irq_vector(irq, cfg);
return;
}
- ioapic_register_intr(irq, trigger);
- if (irq < 16)
+ ioapic_register_intr(irq, desc, trigger);
+ if (irq < NR_IRQS_LEGACY)
disable_8259A_irq(irq);
ioapic_write_entry(apic, pin, entry);
{
int apic, pin, idx, irq;
int notcon = 0;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ int cpu = boot_cpu_id;
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
if (multi_timer_check(apic, irq))
continue;
#endif
- add_pin_to_irq(irq, apic, pin);
+ desc = irq_to_desc_alloc_cpu(irq, cpu);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ continue;
+ }
+ cfg = desc->chip_data;
+ add_pin_to_irq_cpu(cfg, cpu, apic, pin);
- setup_IO_APIC_irq(apic, pin, irq,
+ setup_IO_APIC_irq(apic, pin, irq, desc,
irq_trigger(idx), irq_polarity(idx));
}
}
union IO_APIC_reg_03 reg_03;
unsigned long flags;
struct irq_cfg *cfg;
+ struct irq_desc *desc;
unsigned int irq;
if (apic_verbosity == APIC_QUIET)
}
}
printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for_each_irq_cfg(irq, cfg) {
- struct irq_pin_list *entry = cfg->irq_2_pin;
+ for_each_irq_desc(irq, desc) {
+ struct irq_pin_list *entry;
+
+ if (!desc)
+ continue;
+ cfg = desc->chip_data;
+ entry = cfg->irq_2_pin;
if (!entry)
continue;
printk(KERN_DEBUG "IRQ%d ", irq);
{
int was_pending = 0;
unsigned long flags;
+ struct irq_cfg *cfg;
spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < 16) {
+ if (irq < NR_IRQS_LEGACY) {
disable_8259A_irq(irq);
if (i8259A_irq_pending(irq))
was_pending = 1;
}
- __unmask_IO_APIC_irq(irq);
+ cfg = irq_cfg(irq);
+ __unmask_IO_APIC_irq(cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
return was_pending;
* as simple as edge triggered migration and we can do the irq migration
* with a simple atomic update to IO-APIC RTE.
*/
- static void migrate_ioapic_irq(int irq, cpumask_t mask)
+ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
{
struct irq_cfg *cfg;
- struct irq_desc *desc;
cpumask_t tmp, cleanup_mask;
struct irte irte;
int modify_ioapic_rte;
unsigned int dest;
unsigned long flags;
+ unsigned int irq;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
+ irq = desc->irq;
if (get_irte(irq, &irte))
return;
- if (assign_irq_vector(irq, mask))
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
return;
- cfg = irq_cfg(irq);
+ set_extra_move_desc(desc, mask);
+
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
modify_ioapic_rte = desc->status & IRQ_LEVEL;
if (modify_ioapic_rte) {
spin_lock_irqsave(&ioapic_lock, flags);
- __target_IO_APIC_irq(irq, dest, cfg->vector);
+ __target_IO_APIC_irq(irq, dest, cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
desc->affinity = mask;
}
- static int migrate_irq_remapped_level(int irq)
+ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
{
int ret = -1;
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
- mask_IO_APIC_irq(irq);
+ mask_IO_APIC_irq_desc(desc);
- if (io_apic_level_ack_pending(irq)) {
+ if (io_apic_level_ack_pending(cfg)) {
/*
* Interrupt in progress. Migrating irq now will change the
* vector information in the IO-APIC RTE and that will confuse
}
/* everthing is clear. we have right of way */
- migrate_ioapic_irq(irq, desc->pending_mask);
+ migrate_ioapic_irq_desc(desc, desc->pending_mask);
ret = 0;
desc->status &= ~IRQ_MOVE_PENDING;
cpus_clear(desc->pending_mask);
unmask:
- unmask_IO_APIC_irq(irq);
+ unmask_IO_APIC_irq_desc(desc);
+
return ret;
}
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+
if (desc->status & IRQ_MOVE_PENDING) {
unsigned long flags;
continue;
}
- desc->chip->set_affinity(irq, desc->pending_mask);
+ desc->chip->set_affinity(irq, &desc->pending_mask);
spin_unlock_irqrestore(&desc->lock, flags);
}
}
/*
* Migrates the IRQ destination in the process context.
*/
- static void set_ir_ioapic_affinity_irq(unsigned int irq,
- const struct cpumask *mask)
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
++static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
++ const struct cpumask *mask)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
if (desc->status & IRQ_LEVEL) {
desc->status |= IRQ_MOVE_PENDING;
- desc->pending_mask = mask;
+ cpumask_copy(&desc->pending_mask, mask);
- migrate_irq_remapped_level(irq);
+ migrate_irq_remapped_level_desc(desc);
return;
}
- migrate_ioapic_irq(irq, *mask);
+ migrate_ioapic_irq_desc(desc, mask);
+ }
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
++static void set_ir_ioapic_affinity_irq(unsigned int irq,
++ const struct cpumask *mask)
+ {
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
}
#endif
struct irq_cfg *cfg;
irq = __get_cpu_var(vector_irq)[vector];
+ if (irq == -1)
+ continue;
+
desc = irq_to_desc(irq);
if (!desc)
continue;
irq_exit();
}
- static void irq_complete_move(unsigned int irq)
+ static void irq_complete_move(struct irq_desc **descp)
{
- struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_desc *desc = *descp;
+ struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
if (likely(!cfg->move_in_progress))
}
}
#else
- static inline void irq_complete_move(unsigned int irq) {}
+ static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
+
#ifdef CONFIG_INTR_REMAP
static void ack_x2apic_level(unsigned int irq)
{
{
ack_x2APIC_irq();
}
+
#endif
static void ack_apic_edge(unsigned int irq)
{
- irq_complete_move(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ irq_complete_move(&desc);
move_native_irq(irq);
ack_APIC_irq();
}
static void ack_apic_level(unsigned int irq)
{
+ struct irq_desc *desc = irq_to_desc(irq);
+
#ifdef CONFIG_X86_32
unsigned long v;
int i;
#endif
+ struct irq_cfg *cfg;
int do_unmask_irq = 0;
- irq_complete_move(irq);
+ irq_complete_move(&desc);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+ if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
do_unmask_irq = 1;
- mask_IO_APIC_irq(irq);
+ mask_IO_APIC_irq_desc(desc);
}
#endif
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
*/
- i = irq_cfg(irq)->vector;
+ cfg = desc->chip_data;
+ i = cfg->vector;
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
#endif
* accurate and is causing problems then it is a hardware bug
* and you can go talk to the chipset vendor about it.
*/
- if (!io_apic_level_ack_pending(irq))
+ cfg = desc->chip_data;
+ if (!io_apic_level_ack_pending(cfg))
move_masked_irq(irq);
- unmask_IO_APIC_irq(irq);
+ unmask_IO_APIC_irq_desc(desc);
}
#ifdef CONFIG_X86_32
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(irq);
- __unmask_and_level_IO_APIC_irq(irq);
+ __mask_and_edge_IO_APIC_irq(cfg);
+ __unmask_and_level_IO_APIC_irq(cfg);
spin_unlock(&ioapic_lock);
}
#endif
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- for_each_irq_cfg(irq, cfg) {
- if (IO_APIC_IRQ(irq) && !cfg->vector) {
+ for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+
+ cfg = desc->chip_data;
+ if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
* so default to an old-fashioned 8259
* interrupt if we can..
*/
- if (irq < 16)
+ if (irq < NR_IRQS_LEGACY)
make_8259A_irq(irq);
- else {
- desc = irq_to_desc(irq);
+ else
/* Strange. Oh, well.. */
desc->chip = &no_irq_chip;
- }
}
}
}
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
- static void ack_lapic_irq (unsigned int irq)
+ static void ack_lapic_irq(unsigned int irq)
{
ack_APIC_irq();
}
.ack = ack_lapic_irq,
};
- static void lapic_register_intr(int irq)
+ static void lapic_register_intr(int irq, struct irq_desc *desc)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
desc->status &= ~IRQ_LEVEL;
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = irq_cfg(0);
+ struct irq_desc *desc = irq_to_desc(0);
+ struct irq_cfg *cfg = desc->chip_data;
+ int cpu = boot_cpu_id;
int apic1, pin1, apic2, pin2;
unsigned long flags;
unsigned int ver;
* get/set the timer IRQ vector:
*/
disable_8259A_irq(0);
- assign_irq_vector(0, TARGET_CPUS);
+ assign_irq_vector(0, cfg, TARGET_CPUS);
/*
* As IRQ0 is to be enabled in the 8259A, the virtual
* Ok, does IRQ0 through the IOAPIC work?
*/
if (no_pin1) {
- add_pin_to_irq(0, apic1, pin1);
+ add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
}
- unmask_IO_APIC_irq(0);
+ unmask_IO_APIC_irq_desc(desc);
if (timer_irq_works()) {
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
/*
* legacy devices should be connected to IO APIC #0
*/
- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+ replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
- unmask_IO_APIC_irq(0);
+ unmask_IO_APIC_irq_desc(desc);
enable_8259A_irq(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
- lapic_register_intr(0);
+ lapic_register_intr(0, desc);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
enable_8259A_irq(0);
unsigned int irq;
unsigned int new;
unsigned long flags;
- struct irq_cfg *cfg_new;
-
- irq_want = nr_irqs - 1;
+ struct irq_cfg *cfg_new = NULL;
+ int cpu = boot_cpu_id;
+ struct irq_desc *desc_new = NULL;
irq = 0;
spin_lock_irqsave(&vector_lock, flags);
- for (new = irq_want; new > 0; new--) {
+ for (new = irq_want; new < NR_IRQS; new++) {
if (platform_legacy_irq(new))
continue;
- cfg_new = irq_cfg(new);
- if (cfg_new && cfg_new->vector != 0)
+
+ desc_new = irq_to_desc_alloc_cpu(new, cpu);
+ if (!desc_new) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", new);
+ continue;
+ }
+ cfg_new = desc_new->chip_data;
+
+ if (cfg_new->vector != 0)
continue;
- /* check if need to create one */
- if (!cfg_new)
- cfg_new = irq_cfg_alloc(new);
- if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+ if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
irq = new;
break;
}
if (irq > 0) {
dynamic_irq_init(irq);
+ /* restore it, in case dynamic_irq_init clear it */
+ if (desc_new)
+ desc_new->chip_data = cfg_new;
}
return irq;
}
+ static int nr_irqs_gsi = NR_IRQS_LEGACY;
int create_irq(void)
{
+ unsigned int irq_want;
int irq;
- irq = create_irq_nr(nr_irqs - 1);
+ irq_want = nr_irqs_gsi;
+ irq = create_irq_nr(irq_want);
if (irq == 0)
irq = -1;
void destroy_irq(unsigned int irq)
{
unsigned long flags;
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+ /* store it, in case dynamic_irq_cleanup clear it */
+ desc = irq_to_desc(irq);
+ cfg = desc->chip_data;
dynamic_irq_cleanup(irq);
+ /* connect back irq_cfg */
+ if (desc)
+ desc->chip_data = cfg;
#ifdef CONFIG_INTR_REMAP
free_irte(irq);
#endif
spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq);
+ __clear_irq_vector(irq, cfg);
spin_unlock_irqrestore(&vector_lock, flags);
}
unsigned dest;
cpumask_t tmp;
+ cfg = irq_cfg(irq);
tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, tmp);
+ err = assign_irq_vector(irq, cfg, tmp);
if (err)
return err;
- cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, tmp);
dest = cpu_mask_to_apicid(tmp);
}
#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
cpumask_t tmp;
- struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
- if (assign_irq_vector(irq, *mask))
+ cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
++ if (assign_irq_vector(irq, cfg, *mask))
return;
- cfg = irq_cfg(irq);
- set_extra_move_desc(desc, mask);
++ set_extra_move_desc(desc, *mask);
+
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
- read_msi_msg(irq, &msg);
+ read_msi_msg_desc(desc, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- write_msi_msg(irq, &msg);
- desc = irq_to_desc(irq);
+ write_msi_msg_desc(desc, &msg);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
-
#ifdef CONFIG_INTR_REMAP
/*
* Migrate the MSI irq to another cpumask. This migration is
* done in the process context using interrupt-remapping hardware.
*/
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void ir_set_msi_irq_affinity(unsigned int irq,
+ const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
unsigned int dest;
cpumask_t tmp, cleanup_mask;
struct irte irte;
- struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
if (get_irte(irq, &irte))
return;
- if (assign_irq_vector(irq, *mask))
+ cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
++ if (assign_irq_vector(irq, cfg, *mask))
return;
- cfg = irq_cfg(irq);
+ set_extra_move_desc(desc, mask);
+
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
irte.vector = cfg->vector;
cfg->move_in_progress = 0;
}
- desc = irq_to_desc(irq);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
+
#endif
#endif /* CONFIG_SMP */
}
#endif
- static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
int ret;
struct msi_msg msg;
if (ret < 0)
return ret;
- set_irq_msi(irq, desc);
+ set_irq_msi(irq, msidesc);
write_msi_msg(irq, &msg);
#ifdef CONFIG_INTR_REMAP
return 0;
}
- static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
- {
- unsigned int irq;
-
- irq = dev->bus->number;
- irq <<= 8;
- irq |= dev->devfn;
- irq <<= 12;
-
- return irq;
- }
-
- int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
{
unsigned int irq;
int ret;
unsigned int irq_want;
- irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
+ irq_want = nr_irqs_gsi;
irq = create_irq_nr(irq_want);
if (irq == 0)
return -1;
goto error;
no_ir:
#endif
- ret = setup_msi_irq(dev, desc, irq);
+ ret = setup_msi_irq(dev, msidesc, irq);
if (ret < 0) {
destroy_irq(irq);
return ret;
{
unsigned int irq;
int ret, sub_handle;
- struct msi_desc *desc;
+ struct msi_desc *msidesc;
unsigned int irq_want;
#ifdef CONFIG_INTR_REMAP
int index = 0;
#endif
- irq_want = build_irq_for_pci_dev(dev) + 0x100;
+ irq_want = nr_irqs_gsi;
sub_handle = 0;
- list_for_each_entry(desc, &dev->msi_list, list) {
- irq = create_irq_nr(irq_want--);
+ list_for_each_entry(msidesc, &dev->msi_list, list) {
+ irq = create_irq_nr(irq_want);
+ irq_want++;
if (irq == 0)
return -1;
#ifdef CONFIG_INTR_REMAP
}
no_ir:
#endif
- ret = setup_msi_irq(dev, desc, irq);
+ ret = setup_msi_irq(dev, msidesc, irq);
if (ret < 0)
goto error;
sub_handle++;
#ifdef CONFIG_DMAR
#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
cpumask_t tmp;
- struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
- if (assign_irq_vector(irq, *mask))
+ cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
++ if (assign_irq_vector(irq, cfg, *mask))
return;
- cfg = irq_cfg(irq);
- set_extra_move_desc(desc, mask);
++ set_extra_move_desc(desc, *mask);
+
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
dmar_msi_read(irq, &msg);
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
dmar_msi_write(irq, &msg);
- desc = irq_to_desc(irq);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
+
#endif /* CONFIG_SMP */
struct irq_chip dmar_msi_type = {
#ifdef CONFIG_HPET_TIMER
#ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
- struct irq_desc *desc;
struct msi_msg msg;
unsigned int dest;
cpumask_t tmp;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
- if (assign_irq_vector(irq, *mask))
+ cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
++ if (assign_irq_vector(irq, cfg, *mask))
return;
- cfg = irq_cfg(irq);
- set_extra_move_desc(desc, mask);
++ set_extra_move_desc(desc, *mask);
+
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
hpet_msi_read(irq, &msg);
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
hpet_msi_write(irq, &msg);
- desc = irq_to_desc(irq);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
+
#endif /* CONFIG_SMP */
struct irq_chip hpet_msi_type = {
write_ht_irq_msg(irq, &msg);
}
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
unsigned int dest;
cpumask_t tmp;
- struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
- if (assign_irq_vector(irq, *mask))
+ cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
++ if (assign_irq_vector(irq, cfg, *mask))
return;
- cfg = irq_cfg(irq);
- set_extra_move_desc(desc, mask);
++ set_extra_move_desc(desc, *mask);
+
- cpus_and(tmp, cfg->domain, mask);
+ cpumask_and(&tmp, &cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
target_ht_irq(irq, dest, cfg->vector);
- desc = irq_to_desc(irq);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
+
#endif
static struct irq_chip ht_irq_chip = {
int err;
cpumask_t tmp;
+ cfg = irq_cfg(irq);
tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, tmp);
+ err = assign_irq_vector(irq, cfg, tmp);
if (!err) {
struct ht_irq_msg msg;
unsigned dest;
- cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, tmp);
dest = cpu_mask_to_apicid(tmp);
unsigned long flags;
int err;
- err = assign_irq_vector(irq, *eligible_cpu);
+ cfg = irq_cfg(irq);
+
+ err = assign_irq_vector(irq, cfg, *eligible_cpu);
if (err != 0)
return err;
irq_name);
spin_unlock_irqrestore(&vector_lock, flags);
- cfg = irq_cfg(irq);
-
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
return reg_01.bits.entries;
}
- int __init probe_nr_irqs(void)
+ void __init probe_nr_irqs_gsi(void)
{
- return NR_IRQS;
+ int idx;
+ int nr = 0;
+
+ for (idx = 0; idx < nr_ioapics; idx++)
+ nr += io_apic_get_redir_entries(idx) + 1;
+
+ if (nr > nr_irqs_gsi)
+ nr_irqs_gsi = nr;
}
/* --------------------------------------------------------------------------
int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
{
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ int cpu = boot_cpu_id;
+
if (!IO_APIC_IRQ(irq)) {
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
ioapic);
return -EINVAL;
}
+ desc = irq_to_desc_alloc_cpu(irq, cpu);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc %d\n", irq);
+ return 0;
+ }
+
/*
* IRQs < 16 are already in the irq_2_pin[] map
*/
- if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
+ if (irq >= NR_IRQS_LEGACY) {
+ cfg = desc->chip_data;
+ add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+ }
- setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+ setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
return 0;
}
* when you have too many devices, because at that time only boot
* cpu is online.
*/
- cfg = irq_cfg(irq);
+ desc = irq_to_desc(irq);
+ cfg = desc->chip_data;
if (!cfg->vector) {
- setup_IO_APIC_irq(ioapic, pin, irq,
+ setup_IO_APIC_irq(ioapic, pin, irq, desc,
irq_trigger(irq_entry),
irq_polarity(irq_entry));
continue;
/*
* Honour affinities which have been set in early boot
*/
- desc = irq_to_desc(irq);
if (desc->status &
(IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
mask = desc->affinity;
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq(irq, &mask);
- set_ir_ioapic_affinity_irq_desc(desc, mask);
++ set_ir_ioapic_affinity_irq_desc(desc, &mask);
else
#endif
- set_ioapic_affinity_irq(irq, &mask);
- set_ioapic_affinity_irq_desc(desc, mask);
++ set_ioapic_affinity_irq_desc(desc, &mask);
}
}
struct resource *ioapic_res;
int i;
- irq_2_pin_init();
ioapic_res = ioapic_setup_resources();
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
for_each_irq_desc(irq, desc) {
cpumask_t mask;
+ if (!desc)
+ continue;
if (irq == 2)
continue;
mask = map;
}
if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, mask);
+ desc->chip->set_affinity(irq, &mask);
else if (desc->action && !(warned++))
printk("Cannot set affinity for irq %i\n", irq);
}
int break_affinity = 0;
int set_affinity = 1;
+ if (!desc)
+ continue;
if (irq == 2)
continue;
desc->chip->mask(irq);
if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, mask);
+ desc->chip->set_affinity(irq, &mask);
else if (!(warned++))
set_affinity = 0;
#include <asm/mtrr.h>
#include <asm/vmi.h>
#include <asm/genapic.h>
+ #include <asm/setup.h>
#include <linux/mc146818rtc.h>
#include <mach_apic.h>
/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
-/* bitmap of online cpus */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
pr_debug("Before bogocount - setting activated=1.\n");
}
- static inline void __inquire_remote_apic(int apicid)
+ void __inquire_remote_apic(int apicid)
{
unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
}
}
- #ifdef WAKE_SECONDARY_VIA_NMI
/*
* Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
- static int __devinit
- wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+ int __devinit
+ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt;
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
maxlvt = lapic_get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
return (send_status | accept_status);
}
- #endif /* WAKE_SECONDARY_VIA_NMI */
- #ifdef WAKE_SECONDARY_VIA_INIT
- static int __devinit
- wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+ int __devinit
+ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt, num_starts, j;
return (send_status | accept_status);
}
- #endif /* WAKE_SECONDARY_VIA_INIT */
struct create_idle {
struct work_struct work;
int i;
/* By default all event channels notify CPU#0. */
- for_each_irq_desc(i, desc)
+ for_each_irq_desc(i, desc) {
+ if (!desc)
+ continue;
+
desc->affinity = cpumask_of_cpu(0);
+ }
#endif
memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
int irq;
/* Only allocate from dynirq range */
- for_each_irq_nr(irq)
+ for (irq = 0; irq < nr_irqs; irq++)
if (irq_bindcount[irq] == 0)
break;
spin_unlock(&irq_mapping_update_lock);
/* new event channels are always bound to cpu 0 */
- irq_set_affinity(irq, cpumask_of_cpu(0));
+ irq_set_affinity(irq, cpumask_of(0));
/* Unmask the event channel. */
enable_irq(irq);
}
-static void set_affinity_irq(unsigned irq, cpumask_t dest)
+static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
{
- unsigned tcpu = first_cpu(dest);
+ unsigned tcpu = cpumask_first(dest);
rebind_irq_to_cpu(irq, tcpu);
}
mask_evtchn(evtchn);
/* No IRQ <-> event-channel mappings. */
- for_each_irq_nr(irq)
+ for (irq = 0; irq < nr_irqs; irq++)
irq_info[irq].evtchn = 0; /* zap event-channel binding */
for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
mask_evtchn(i);
/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
- for_each_irq_nr(i)
+ for (i = 0; i < nr_irqs; i++)
irq_bindcount[i] = 0;
irq_ctx_init(smp_processor_id());
#include <linux/irqflags.h>
#include <linux/smp.h>
#include <linux/percpu.h>
+ #include <linux/irqnr.h>
+
#include <asm/atomic.h>
#include <asm/ptrace.h>
#include <asm/system.h>
extern cpumask_t irq_default_affinity;
-extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
extern int irq_can_set_affinity(unsigned int irq);
extern int irq_select_affinity(unsigned int irq);
#else /* CONFIG_SMP */
-static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
{
return -EINVAL;
}
void (*eoi)(unsigned int irq);
void (*end)(unsigned int irq);
- void (*set_affinity)(unsigned int irq, cpumask_t dest);
+ void (*set_affinity)(unsigned int irq,
+ const struct cpumask *dest);
int (*retrigger)(unsigned int irq);
int (*set_type)(unsigned int irq, unsigned int flow_type);
int (*set_wake)(unsigned int irq, unsigned int on);
const char *typename;
};
+ struct timer_rand_state;
+ struct irq_2_iommu;
/**
* struct irq_desc - interrupt descriptor
* @irq: interrupt number for this descriptor
*/
struct irq_desc {
unsigned int irq;
+ #ifdef CONFIG_SPARSE_IRQ
+ struct timer_rand_state *timer_rand_state;
+ unsigned int *kstat_irqs;
+ # ifdef CONFIG_INTR_REMAP
+ struct irq_2_iommu *irq_2_iommu;
+ # endif
+ #endif
irq_flow_handler_t handle_irq;
struct irq_chip *chip;
struct msi_desc *msi_desc;
const char *name;
} ____cacheline_internodealigned_in_smp;
+ extern void early_irq_init(void);
+ extern void arch_early_irq_init(void);
+ extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
+ extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int cpu);
+ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
+ #ifndef CONFIG_SPARSE_IRQ
extern struct irq_desc irq_desc[NR_IRQS];
static inline struct irq_desc *irq_to_desc(unsigned int irq)
{
- return (irq < nr_irqs) ? irq_desc + irq : NULL;
+ return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+ }
+ static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+ {
+ return irq_to_desc(irq);
}
+ #else
+
+ extern struct irq_desc *irq_to_desc(unsigned int irq);
+ extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+
+ # define for_each_irq_desc(irq, desc) \
+ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
+ # define for_each_irq_desc_reverse(irq, desc) \
+ for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
+
+ #define kstat_irqs_this_cpu(DESC) \
+ ((DESC)->kstat_irqs[smp_processor_id()])
+ #define kstat_incr_irqs_this_cpu(irqno, DESC) \
+ ((DESC)->kstat_irqs[smp_processor_id()]++)
+
+ #endif
+
/*
* Migration helpers for obsolete names, they will go away:
*/
#define get_irq_data(irq) (irq_to_desc(irq)->handler_data)
#define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc)
+ #define get_irq_desc_chip(desc) ((desc)->chip)
+ #define get_irq_desc_chip_data(desc) ((desc)->chip_data)
+ #define get_irq_desc_data(desc) ((desc)->handler_data)
+ #define get_irq_desc_msi(desc) ((desc)->msi_desc)
+
#endif /* CONFIG_GENERIC_HARDIRQS */
#endif /* !CONFIG_S390 */
config MARKERS
bool "Activate markers"
+ depends on TRACEPOINTS
help
Place an empty function call at each marker site. Can be
dynamically changed for a probe function.
endif # MODULES
+config INIT_ALL_POSSIBLE
+ bool
+ help
+ Back when each arch used to define their own cpu_online_map and
+ cpu_possible_map, some of them chose to initialize cpu_possible_map
+ with all 1s, and others with all 0s. When they were centralised,
+ it was better to provide this option than to break all the archs
+ and have several arch maintainers persuing me down dark alleys.
+
config STOP_MACHINE
bool
default y
*/
void dynamic_irq_init(unsigned int irq)
{
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_desc *desc;
unsigned long flags;
+ desc = irq_to_desc(irq);
if (!desc) {
WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
return;
desc->irq_count = 0;
desc->irqs_unhandled = 0;
#ifdef CONFIG_SMP
- cpus_setall(desc->affinity);
+ cpumask_setall(&desc->affinity);
#endif
spin_unlock_irqrestore(&desc->lock, flags);
}
const char __user *buffer, size_t count, loff_t *pos)
{
unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
- cpumask_t new_value;
+ cpumask_var_t new_value;
int err;
if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
irq_balancing_disabled(irq))
return -EIO;
+ if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+ return -ENOMEM;
+
err = cpumask_parse_user(buffer, count, new_value);
if (err)
- return err;
+ goto free_cpumask;
- if (!is_affinity_mask_valid(new_value))
- return -EINVAL;
+ if (!is_affinity_mask_valid(*new_value)) {
+ err = -EINVAL;
+ goto free_cpumask;
+ }
/*
* Do not allow disabling IRQs completely - it's a too easy
* way to make the system unusable accidentally :-) At least
* one online CPU still has to be targeted.
*/
- if (!cpus_intersects(new_value, cpu_online_map))
+ if (!cpumask_intersects(new_value, cpu_online_mask)) {
/* Special case for empty set - allow the architecture
code to set default SMP affinity. */
- return irq_select_affinity_usr(irq) ? -EINVAL : count;
-
- irq_set_affinity(irq, new_value);
-
- return count;
+ err = irq_select_affinity_usr(irq) ? -EINVAL : count;
+ } else {
+ irq_set_affinity(irq, new_value);
+ err = count;
+ }
+
+free_cpumask:
+ free_cpumask_var(new_value);
+ return err;
}
static int irq_affinity_proc_open(struct inode *inode, struct file *file)
cpumask_t new_value;
int err;
- err = cpumask_parse_user(buffer, count, new_value);
+ err = cpumask_parse_user(buffer, count, &new_value);
if (err)
return err;
/*
* Create entries for all existing IRQs.
*/
- for_each_irq_desc(irq, desc)
+ for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+
register_irq_proc(irq, desc);
+ }
}
static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+ int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
if (count - len < 2)
return -EINVAL;
len += sprintf(page + len, "\n");
unsigned long full_count = count, err;
cpumask_t new_value;
- err = cpumask_parse_user(buffer, count, new_value);
+ err = cpumask_parse_user(buffer, count, &new_value);
if (err)
return err;
};
#ifdef CONFIG_SMP
- static inline void profile_nop(void *unused)
+ static void profile_nop(void *unused)
{
}
*/
#define RUNTIME_INF ((u64)~0ULL)
+ DEFINE_TRACE(sched_wait_task);
+ DEFINE_TRACE(sched_wakeup);
+ DEFINE_TRACE(sched_wakeup_new);
+ DEFINE_TRACE(sched_switch);
+ DEFINE_TRACE(sched_migrate_task);
+
#ifdef CONFIG_SMP
/*
* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
struct cgroup_subsys_state css;
#endif
+ #ifdef CONFIG_USER_SCHED
+ uid_t uid;
+ #endif
+
#ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each cpu */
struct sched_entity **se;
#ifdef CONFIG_USER_SCHED
+ /* Helper function to pass uid information to create_sched_user() */
+ void set_tg_uid(struct user_struct *user)
+ {
+ user->tg->uid = user->uid;
+ }
+
/*
* Root task group.
* Every UID task group (including init_task_group aka UID-0) will
*/
struct root_domain {
atomic_t refcount;
- cpumask_t span;
- cpumask_t online;
+ cpumask_var_t span;
+ cpumask_var_t online;
/*
* The "RT overload" flag: it gets set if a CPU has more than
* one runnable RT task.
*/
- cpumask_t rto_mask;
+ cpumask_var_t rto_mask;
atomic_t rto_count;
#ifdef CONFIG_SMP
struct cpupri cpupri;
#undef SCHED_FEAT
- static int sched_feat_open(struct inode *inode, struct file *filp)
+ static int sched_feat_show(struct seq_file *m, void *v)
{
int i;
for (i = 0; sched_feat_names[i]; i++) {
- len += strlen(sched_feat_names[i]);
- len += 4;
- }
-
- buf = kmalloc(len + 2, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- for (i = 0; sched_feat_names[i]; i++) {
- if (sysctl_sched_features & (1UL << i))
- r += sprintf(buf + r, "%s ", sched_feat_names[i]);
- else
- r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
+ if (!(sysctl_sched_features & (1UL << i)))
+ seq_puts(m, "NO_");
+ seq_printf(m, "%s ", sched_feat_names[i]);
}
+ seq_puts(m, "\n");
- r += sprintf(buf + r, "\n");
- WARN_ON(r >= len + 2);
-
- r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-
- kfree(buf);
-
- return r;
+ return 0;
}
static ssize_t
return cnt;
}
+ static int sched_feat_open(struct inode *inode, struct file *filp)
+ {
+ return single_open(filp, sched_feat_show, NULL);
+ }
+
static struct file_operations sched_feat_fops = {
- .open = sched_feat_open,
- .read = sched_feat_read,
- .write = sched_feat_write,
+ .open = sched_feat_open,
+ .write = sched_feat_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
};
static __init int sched_init_debug(void)
update_group_shares_cpu(struct task_group *tg, int cpu,
unsigned long sd_shares, unsigned long sd_rq_weight)
{
- int boost = 0;
unsigned long shares;
unsigned long rq_weight;
if (!tg->se[cpu])
return;
- rq_weight = tg->cfs_rq[cpu]->load.weight;
-
- /*
- * If there are currently no tasks on the cpu pretend there is one of
- * average load so that when a new task gets to run here it will not
- * get delayed by group starvation.
- */
- if (!rq_weight) {
- boost = 1;
- rq_weight = NICE_0_LOAD;
- }
-
- if (unlikely(rq_weight > sd_rq_weight))
- rq_weight = sd_rq_weight;
+ rq_weight = tg->cfs_rq[cpu]->rq_weight;
/*
* \Sum shares * rq_weight
* \Sum rq_weight
*
*/
- shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+ shares = (sd_shares * rq_weight) / sd_rq_weight;
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
if (abs(shares - tg->se[cpu]->load.weight) >
unsigned long flags;
spin_lock_irqsave(&rq->lock, flags);
- /*
- * record the actual number of shares, not the boosted amount.
- */
- tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
- tg->cfs_rq[cpu]->rq_weight = rq_weight;
+ tg->cfs_rq[cpu]->shares = shares;
__set_se_shares(tg->se[cpu], shares);
spin_unlock_irqrestore(&rq->lock, flags);
*/
static int tg_shares_up(struct task_group *tg, void *data)
{
- unsigned long rq_weight = 0;
+ unsigned long weight, rq_weight = 0;
unsigned long shares = 0;
struct sched_domain *sd = data;
int i;
- for_each_cpu_mask(i, sd->span) {
- rq_weight += tg->cfs_rq[i]->load.weight;
+ for_each_cpu(i, sched_domain_span(sd)) {
+ /*
+ * If there are currently no tasks on the cpu pretend there
+ * is one of average load so that when a new task gets to
+ * run here it will not get delayed by group starvation.
+ */
+ weight = tg->cfs_rq[i]->load.weight;
+ if (!weight)
+ weight = NICE_0_LOAD;
+
+ tg->cfs_rq[i]->rq_weight = weight;
+ rq_weight += weight;
shares += tg->cfs_rq[i]->shares;
}
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
shares = tg->shares;
- if (!rq_weight)
- rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
-
- for_each_cpu_mask(i, sd->span)
+ for_each_cpu(i, sched_domain_span(sd))
update_group_shares_cpu(tg, i, shares, rq_weight);
return 0;
#endif
+ /*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(this_rq->lock)
+ __acquires(busiest->lock)
+ __acquires(this_rq->lock)
+ {
+ int ret = 0;
+
+ if (unlikely(!irqs_disabled())) {
+ /* printk() doesn't work good under rq->lock */
+ spin_unlock(&this_rq->lock);
+ BUG_ON(1);
+ }
+ if (unlikely(!spin_trylock(&busiest->lock))) {
+ if (busiest < this_rq) {
+ spin_unlock(&this_rq->lock);
+ spin_lock(&busiest->lock);
+ spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
+ ret = 1;
+ } else
+ spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
+ }
+ return ret;
+ }
+
+ static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(busiest->lock)
+ {
+ spin_unlock(&busiest->lock);
+ lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+ }
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
int i;
/* Skip over this group if it has no CPUs allowed */
- if (!cpus_intersects(group->cpumask, p->cpus_allowed))
+ if (!cpumask_intersects(sched_group_cpus(group),
+ &p->cpus_allowed))
continue;
- local_group = cpu_isset(this_cpu, group->cpumask);
+ local_group = cpumask_test_cpu(this_cpu,
+ sched_group_cpus(group));
/* Tally up the load of all CPUs in the group */
avg_load = 0;
- for_each_cpu_mask_nr(i, group->cpumask) {
+ for_each_cpu(i, sched_group_cpus(group)) {
/* Bias balancing toward cpus of our domain */
if (local_group)
load = source_load(i, load_idx);
* find_idlest_cpu - find the idlest cpu among the cpus in group.
*/
static int
- find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
- cpumask_t *tmp)
+ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
{
unsigned long load, min_load = ULONG_MAX;
int idlest = -1;
int i;
/* Traverse only the allowed CPUs */
- cpus_and(*tmp, group->cpumask, p->cpus_allowed);
-
- for_each_cpu_mask_nr(i, *tmp) {
+ for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
load = weighted_cpuload(i);
if (load < min_load || (load == min_load && i == this_cpu)) {
update_shares(sd);
while (sd) {
- cpumask_t span, tmpmask;
struct sched_group *group;
int new_cpu, weight;
continue;
}
- span = sd->span;
group = find_idlest_group(sd, t, cpu);
if (!group) {
sd = sd->child;
continue;
}
- new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask);
+ new_cpu = find_idlest_cpu(group, t, cpu);
if (new_cpu == -1 || new_cpu == cpu) {
/* Now try balancing at a lower domain level of cpu */
sd = sd->child;
/* Now try balancing at a lower domain level of new_cpu */
cpu = new_cpu;
+ weight = cpumask_weight(sched_domain_span(sd));
sd = NULL;
- weight = cpus_weight(span);
for_each_domain(cpu, tmp) {
- if (weight <= cpus_weight(tmp->span))
+ if (weight <= cpumask_weight(sched_domain_span(tmp)))
break;
if (tmp->flags & flag)
sd = tmp;
cpu = task_cpu(p);
for_each_domain(this_cpu, sd) {
- if (cpu_isset(cpu, sd->span)) {
+ if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
update_shares(sd);
break;
}
else {
struct sched_domain *sd;
for_each_domain(this_cpu, sd) {
- if (cpu_isset(cpu, sd->span)) {
+ if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
schedstat_inc(sd, ttwu_wake_remote);
break;
}
}
/*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
- */
- static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
- __releases(this_rq->lock)
- __acquires(busiest->lock)
- __acquires(this_rq->lock)
- {
- int ret = 0;
-
- if (unlikely(!irqs_disabled())) {
- /* printk() doesn't work good under rq->lock */
- spin_unlock(&this_rq->lock);
- BUG_ON(1);
- }
- if (unlikely(!spin_trylock(&busiest->lock))) {
- if (busiest < this_rq) {
- spin_unlock(&this_rq->lock);
- spin_lock(&busiest->lock);
- spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
- ret = 1;
- } else
- spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
- }
- return ret;
- }
-
- static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
- __releases(busiest->lock)
- {
- spin_unlock(&busiest->lock);
- lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
- }
-
- /*
* If dest_cpu is allowed for this process, migrate the task to it.
* This is accomplished by forcing the cpu_allowed mask to only
* allow dest_cpu, which will force the cpu onto dest_cpu. Then
struct rq *rq;
rq = task_rq_lock(p, &flags);
- if (!cpu_isset(dest_cpu, p->cpus_allowed)
+ if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
|| unlikely(!cpu_active(dest_cpu)))
goto out;
* 2) cannot be migrated to this CPU due to cpus_allowed, or
* 3) are cache-hot on their current CPU.
*/
- if (!cpu_isset(this_cpu, p->cpus_allowed)) {
+ if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
schedstat_inc(p, se.nr_failed_migrations_affine);
return 0;
}
static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu,
unsigned long *imbalance, enum cpu_idle_type idle,
- int *sd_idle, const cpumask_t *cpus, int *balance)
+ int *sd_idle, const struct cpumask *cpus, int *balance)
{
struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
unsigned long max_load, avg_load, total_load, this_load, total_pwr;
unsigned long sum_avg_load_per_task;
unsigned long avg_load_per_task;
- local_group = cpu_isset(this_cpu, group->cpumask);
+ local_group = cpumask_test_cpu(this_cpu,
+ sched_group_cpus(group));
if (local_group)
- balance_cpu = first_cpu(group->cpumask);
+ balance_cpu = cpumask_first(sched_group_cpus(group));
/* Tally up the load of all CPUs in the group */
sum_weighted_load = sum_nr_running = avg_load = 0;
max_cpu_load = 0;
min_cpu_load = ~0UL;
- for_each_cpu_mask_nr(i, group->cpumask) {
- struct rq *rq;
-
- if (!cpu_isset(i, *cpus))
- continue;
-
- rq = cpu_rq(i);
+ for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+ struct rq *rq = cpu_rq(i);
if (*sd_idle && rq->nr_running)
*sd_idle = 0;
*/
if ((sum_nr_running < min_nr_running) ||
(sum_nr_running == min_nr_running &&
- first_cpu(group->cpumask) <
- first_cpu(group_min->cpumask))) {
+ cpumask_first(sched_group_cpus(group)) <
+ cpumask_first(sched_group_cpus(group_min)))) {
group_min = group;
min_nr_running = sum_nr_running;
min_load_per_task = sum_weighted_load /
if (sum_nr_running <= group_capacity - 1) {
if (sum_nr_running > leader_nr_running ||
(sum_nr_running == leader_nr_running &&
- first_cpu(group->cpumask) >
- first_cpu(group_leader->cpumask))) {
+ cpumask_first(sched_group_cpus(group)) >
+ cpumask_first(sched_group_cpus(group_leader)))) {
group_leader = group;
leader_nr_running = sum_nr_running;
}
*/
static struct rq *
find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
- unsigned long imbalance, const cpumask_t *cpus)
+ unsigned long imbalance, const struct cpumask *cpus)
{
struct rq *busiest = NULL, *rq;
unsigned long max_load = 0;
int i;
- for_each_cpu_mask_nr(i, group->cpumask) {
+ for_each_cpu(i, sched_group_cpus(group)) {
unsigned long wl;
- if (!cpu_isset(i, *cpus))
+ if (!cpumask_test_cpu(i, cpus))
continue;
rq = cpu_rq(i);
*/
static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle,
- int *balance, cpumask_t *cpus)
+ int *balance, struct cpumask *cpus)
{
int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
struct sched_group *group;
struct rq *busiest;
unsigned long flags;
- cpus_setall(*cpus);
+ cpumask_setall(cpus);
/*
* When power savings policy is enabled for the parent domain, idle
/* All tasks on this runqueue were pinned by CPU affinity */
if (unlikely(all_pinned)) {
- cpu_clear(cpu_of(busiest), *cpus);
- if (!cpus_empty(*cpus))
+ cpumask_clear_cpu(cpu_of(busiest), cpus);
+ if (!cpumask_empty(cpus))
goto redo;
goto out_balanced;
}
/* don't kick the migration_thread, if the curr
* task on busiest cpu can't be moved to this_cpu
*/
- if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+ if (!cpumask_test_cpu(this_cpu,
+ &busiest->curr->cpus_allowed)) {
spin_unlock_irqrestore(&busiest->lock, flags);
all_pinned = 1;
goto out_one_pinned;
*/
static int
load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
- cpumask_t *cpus)
+ struct cpumask *cpus)
{
struct sched_group *group;
struct rq *busiest = NULL;
int sd_idle = 0;
int all_pinned = 0;
- cpus_setall(*cpus);
+ cpumask_setall(cpus);
/*
* When power savings policy is enabled for the parent domain, idle
double_unlock_balance(this_rq, busiest);
if (unlikely(all_pinned)) {
- cpu_clear(cpu_of(busiest), *cpus);
- if (!cpus_empty(*cpus))
+ cpumask_clear_cpu(cpu_of(busiest), cpus);
+ if (!cpumask_empty(cpus))
goto redo;
}
}
static void idle_balance(int this_cpu, struct rq *this_rq)
{
struct sched_domain *sd;
- int pulled_task = -1;
+ int pulled_task = 0;
unsigned long next_balance = jiffies + HZ;
- cpumask_t tmpmask;
+ cpumask_var_t tmpmask;
+
+ if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
+ return;
for_each_domain(this_cpu, sd) {
unsigned long interval;
if (sd->flags & SD_BALANCE_NEWIDLE)
/* If we've pulled tasks over stop searching: */
pulled_task = load_balance_newidle(this_cpu, this_rq,
- sd, &tmpmask);
+ sd, tmpmask);
interval = msecs_to_jiffies(sd->balance_interval);
if (time_after(next_balance, sd->last_balance + interval))
*/
this_rq->next_balance = next_balance;
}
+ free_cpumask_var(tmpmask);
}
/*
/* Search for an sd spanning us and the target CPU. */
for_each_domain(target_cpu, sd) {
if ((sd->flags & SD_LOAD_BALANCE) &&
- cpu_isset(busiest_cpu, sd->span))
+ cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
break;
}
#ifdef CONFIG_NO_HZ
static struct {
atomic_t load_balancer;
- cpumask_t cpu_mask;
+ cpumask_var_t cpu_mask;
} nohz ____cacheline_aligned = {
.load_balancer = ATOMIC_INIT(-1),
- .cpu_mask = CPU_MASK_NONE,
};
/*
int cpu = smp_processor_id();
if (stop_tick) {
- cpu_set(cpu, nohz.cpu_mask);
+ cpumask_set_cpu(cpu, nohz.cpu_mask);
cpu_rq(cpu)->in_nohz_recently = 1;
/*
}
/* time for ilb owner also to sleep */
- if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
+ if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
if (atomic_read(&nohz.load_balancer) == cpu)
atomic_set(&nohz.load_balancer, -1);
return 0;
} else if (atomic_read(&nohz.load_balancer) == cpu)
return 1;
} else {
- if (!cpu_isset(cpu, nohz.cpu_mask))
+ if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
return 0;
- cpu_clear(cpu, nohz.cpu_mask);
+ cpumask_clear_cpu(cpu, nohz.cpu_mask);
if (atomic_read(&nohz.load_balancer) == cpu)
if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
unsigned long next_balance = jiffies + 60*HZ;
int update_next_balance = 0;
int need_serialize;
- cpumask_t tmp;
+ cpumask_var_t tmp;
+
+ /* Fails alloc? Rebalancing probably not a priority right now. */
+ if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
+ return;
for_each_domain(cpu, sd) {
if (!(sd->flags & SD_LOAD_BALANCE))
}
if (time_after_eq(jiffies, sd->last_balance + interval)) {
- if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) {
+ if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
/*
* We've pulled tasks over so either we're no
* longer idle, or one of our SMT siblings is
*/
if (likely(update_next_balance))
rq->next_balance = next_balance;
+
+ free_cpumask_var(tmp);
}
/*
*/
if (this_rq->idle_at_tick &&
atomic_read(&nohz.load_balancer) == this_cpu) {
struct rq *rq;
int balance_cpu;
- cpu_clear(this_cpu, cpus);
- for_each_cpu_mask_nr(balance_cpu, cpus) {
+ for_each_cpu(balance_cpu, nohz.cpu_mask) {
+ if (balance_cpu == this_cpu)
+ continue;
+
/*
* If this cpu gets work to do, stop the load balancing
* work being done for other cpus. Next load
rq->in_nohz_recently = 0;
if (atomic_read(&nohz.load_balancer) == cpu) {
- cpu_clear(cpu, nohz.cpu_mask);
+ cpumask_clear_cpu(cpu, nohz.cpu_mask);
atomic_set(&nohz.load_balancer, -1);
}
* TBD: Traverse the sched domains and nominate
* the nearest cpu in the nohz.cpu_mask.
*/
- int ilb = first_cpu(nohz.cpu_mask);
+ int ilb = cpumask_first(nohz.cpu_mask);
if (ilb < nr_cpu_ids)
resched_cpu(ilb);
* cpus with ticks stopped, is it time for that to stop?
*/
if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
- cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
+ cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
resched_cpu(cpu);
return;
}
* someone else, then no need raise the SCHED_SOFTIRQ
*/
if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
- cpu_isset(cpu, nohz.cpu_mask))
+ cpumask_test_cpu(cpu, nohz.cpu_mask))
return;
#endif
if (time_after_eq(jiffies, rq->next_balance))
if (p == rq->idle) {
p->stime = cputime_add(p->stime, steal);
- account_group_system_time(p, steal);
if (atomic_read(&rq->nr_iowait) > 0)
cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
else
/*
* Underflow?
*/
- if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+ if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
return;
/*
* Is the spinlock portion underflowing?
return retval;
}
- long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
+ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
{
- cpumask_t cpus_allowed;
- cpumask_t new_mask = *in_mask;
+ cpumask_var_t cpus_allowed, new_mask;
struct task_struct *p;
int retval;
get_task_struct(p);
read_unlock(&tasklist_lock);
+ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+ retval = -ENOMEM;
+ goto out_put_task;
+ }
+ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
+ retval = -ENOMEM;
+ goto out_free_cpus_allowed;
+ }
retval = -EPERM;
if ((current->euid != p->euid) && (current->euid != p->uid) &&
!capable(CAP_SYS_NICE))
if (retval)
goto out_unlock;
- cpuset_cpus_allowed(p, &cpus_allowed);
- cpus_and(new_mask, new_mask, cpus_allowed);
+ cpuset_cpus_allowed(p, cpus_allowed);
+ cpumask_and(new_mask, in_mask, cpus_allowed);
again:
- retval = set_cpus_allowed_ptr(p, &new_mask);
+ retval = set_cpus_allowed_ptr(p, new_mask);
if (!retval) {
- cpuset_cpus_allowed(p, &cpus_allowed);
- if (!cpus_subset(new_mask, cpus_allowed)) {
+ cpuset_cpus_allowed(p, cpus_allowed);
+ if (!cpumask_subset(new_mask, cpus_allowed)) {
/*
* We must have raced with a concurrent cpuset
* update. Just reset the cpus_allowed to the
* cpuset's cpus_allowed
*/
- new_mask = cpus_allowed;
+ cpumask_copy(new_mask, cpus_allowed);
goto again;
}
}
out_unlock:
+ free_cpumask_var(new_mask);
+ out_free_cpus_allowed:
+ free_cpumask_var(cpus_allowed);
+ out_put_task:
put_task_struct(p);
put_online_cpus();
return retval;
}
static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
- cpumask_t *new_mask)
+ struct cpumask *new_mask)
{
- if (len < sizeof(cpumask_t)) {
- memset(new_mask, 0, sizeof(cpumask_t));
- } else if (len > sizeof(cpumask_t)) {
- len = sizeof(cpumask_t);
- }
+ if (len < cpumask_size())
+ cpumask_clear(new_mask);
+ else if (len > cpumask_size())
+ len = cpumask_size();
+
return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
}
asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
unsigned long __user *user_mask_ptr)
{
- cpumask_t new_mask;
+ cpumask_var_t new_mask;
int retval;
- retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask);
- if (retval)
- return retval;
+ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+ return -ENOMEM;
- return sched_setaffinity(pid, &new_mask);
+ retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
+ if (retval == 0)
+ retval = sched_setaffinity(pid, new_mask);
+ free_cpumask_var(new_mask);
+ return retval;
}
- long sched_getaffinity(pid_t pid, cpumask_t *mask)
+ long sched_getaffinity(pid_t pid, struct cpumask *mask)
{
struct task_struct *p;
int retval;
if (retval)
goto out_unlock;
- cpus_and(*mask, p->cpus_allowed, cpu_online_map);
+ cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
out_unlock:
read_unlock(&tasklist_lock);
unsigned long __user *user_mask_ptr)
{
int ret;
- cpumask_t mask;
+ cpumask_var_t mask;
- if (len < sizeof(cpumask_t))
+ if (len < cpumask_size())
return -EINVAL;
- ret = sched_getaffinity(pid, &mask);
- if (ret < 0)
- return ret;
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
- if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t)))
- return -EFAULT;
+ ret = sched_getaffinity(pid, mask);
+ if (ret == 0) {
+ if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
+ ret = -EFAULT;
+ else
+ ret = cpumask_size();
+ }
+ free_cpumask_var(mask);
- return sizeof(cpumask_t);
+ return ret;
}
/**
idle->se.exec_start = sched_clock();
idle->prio = idle->normal_prio = MAX_PRIO;
- idle->cpus_allowed = cpumask_of_cpu(cpu);
+ cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
__set_task_cpu(idle, cpu);
rq->curr = rq->idle = idle;
* The idle tasks have their own, simple scheduling class:
*/
idle->sched_class = &idle_sched_class;
+ ftrace_graph_init_task(idle);
}
/*
* indicates which cpus entered this state. This is used
* in the rcu update to wait only for active cpus. For system
* which do not switch off the HZ timer nohz_cpu_mask should
- * always be CPU_MASK_NONE.
+ * always be CPU_BITS_NONE.
*/
- cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+ cpumask_var_t nohz_cpu_mask;
/*
* Increase the granularity value when there are more CPUs,
* task must not exit() & deallocate itself prematurely. The
* call is not atomic; no spinlocks may be held.
*/
- int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
+ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
struct migration_req req;
unsigned long flags;
int ret = 0;
rq = task_rq_lock(p, &flags);
- if (!cpus_intersects(*new_mask, cpu_online_map)) {
+ if (!cpumask_intersects(new_mask, cpu_online_mask)) {
ret = -EINVAL;
goto out;
}
if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
- !cpus_equal(p->cpus_allowed, *new_mask))) {
+ !cpumask_equal(&p->cpus_allowed, new_mask))) {
ret = -EINVAL;
goto out;
}
if (p->sched_class->set_cpus_allowed)
p->sched_class->set_cpus_allowed(p, new_mask);
else {
- p->cpus_allowed = *new_mask;
- p->rt.nr_cpus_allowed = cpus_weight(*new_mask);
+ cpumask_copy(&p->cpus_allowed, new_mask);
+ p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
}
/* Can the task run on the task's current CPU? If so, we're done */
- if (cpu_isset(task_cpu(p), *new_mask))
+ if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
- if (migrate_task(p, any_online_cpu(*new_mask), &req)) {
+ if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock(rq, &flags);
wake_up_process(rq->migration_thread);
if (task_cpu(p) != src_cpu)
goto done;
/* Affinity changed (again). */
- if (!cpu_isset(dest_cpu, p->cpus_allowed))
+ if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
goto fail;
on_rq = p->se.on_rq;
/*
* Figure out where task on dead CPU should go, use force if necessary.
*/
static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
- unsigned long flags;
- cpumask_t mask;
- struct rq *rq;
int dest_cpu;
+ /* FIXME: Use cpumask_of_node here. */
+ cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
+ const struct cpumask *nodemask = &_nodemask;
+
+ again:
+ /* Look for allowed, online CPU in same node. */
+ for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
+ if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ goto move;
+
+ /* Any allowed, online CPU? */
+ dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
+ if (dest_cpu < nr_cpu_ids)
+ goto move;
+
+ /* No more Mr. Nice Guy. */
+ if (dest_cpu >= nr_cpu_ids) {
+ cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+ dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
- do {
- /* On same node? */
- mask = node_to_cpumask(cpu_to_node(dead_cpu));
- cpus_and(mask, mask, p->cpus_allowed);
- dest_cpu = any_online_cpu(mask);
-
- /* On any allowed CPU? */
- if (dest_cpu >= nr_cpu_ids)
- dest_cpu = any_online_cpu(p->cpus_allowed);
-
- /* No more Mr. Nice Guy. */
- if (dest_cpu >= nr_cpu_ids) {
- cpumask_t cpus_allowed;
-
- cpuset_cpus_allowed_locked(p, &cpus_allowed);
- /*
- * Try to stay on the same cpuset, where the
- * current cpuset may be a subset of all cpus.
- * The cpuset_cpus_allowed_locked() variant of
- * cpuset_cpus_allowed() will not block. It must be
- * called within calls to cpuset_lock/cpuset_unlock.
- */
- rq = task_rq_lock(p, &flags);
- p->cpus_allowed = cpus_allowed;
- dest_cpu = any_online_cpu(p->cpus_allowed);
- task_rq_unlock(rq, &flags);
-
- /*
- * Don't tell them about moving exiting tasks or
- * kernel threads (both mm NULL), since they never
- * leave kernel.
- */
- if (p->mm && printk_ratelimit()) {
- printk(KERN_INFO "process %d (%s) no "
- "longer affine to cpu%d\n",
- task_pid_nr(p), p->comm, dead_cpu);
- }
+ /*
+ * Don't tell them about moving exiting tasks or
+ * kernel threads (both mm NULL), since they never
+ * leave kernel.
+ */
+ if (p->mm && printk_ratelimit()) {
+ printk(KERN_INFO "process %d (%s) no "
+ "longer affine to cpu%d\n",
+ task_pid_nr(p), p->comm, dead_cpu);
}
- } while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
+ }
+
+ move:
+ /* It can have affinity changed while we were choosing. */
+ if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
+ goto again;
}
/*
*/
static void migrate_nr_uninterruptible(struct rq *rq_src)
{
- struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR));
+ struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
unsigned long flags;
local_irq_save(flags);
if (!rq->online) {
const struct sched_class *class;
- cpu_set(rq->cpu, rq->rd->online);
+ cpumask_set_cpu(rq->cpu, rq->rd->online);
rq->online = 1;
for_each_class(class) {
class->rq_offline(rq);
}
- cpu_clear(rq->cpu, rq->rd->online);
+ cpumask_clear_cpu(rq->cpu, rq->rd->online);
rq->online = 0;
}
}
rq = cpu_rq(cpu);
spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
- BUG_ON(!cpu_isset(cpu, rq->rd->span));
+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_online(rq);
}
break;
/* Unbind it from offline cpu so it can run. Fall thru. */
kthread_bind(cpu_rq(cpu)->migration_thread,
- any_online_cpu(cpu_online_map));
+ cpumask_any(cpu_online_mask));
kthread_stop(cpu_rq(cpu)->migration_thread);
cpu_rq(cpu)->migration_thread = NULL;
break;
rq = cpu_rq(cpu);
spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
- BUG_ON(!cpu_isset(cpu, rq->rd->span));
+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
spin_unlock_irqrestore(&rq->lock, flags);
#ifdef CONFIG_SCHED_DEBUG
- static inline const char *sd_level_to_string(enum sched_domain_level lvl)
- {
- switch (lvl) {
- case SD_LV_NONE:
- return "NONE";
- case SD_LV_SIBLING:
- return "SIBLING";
- case SD_LV_MC:
- return "MC";
- case SD_LV_CPU:
- return "CPU";
- case SD_LV_NODE:
- return "NODE";
- case SD_LV_ALLNODES:
- return "ALLNODES";
- case SD_LV_MAX:
- return "MAX";
-
- }
- return "MAX";
- }
-
static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
- cpumask_t *groupmask)
+ struct cpumask *groupmask)
{
struct sched_group *group = sd->groups;
char str[256];
- cpulist_scnprintf(str, sizeof(str), &sd->span);
- cpus_clear(*groupmask);
- cpulist_scnprintf(str, sizeof(str), *sched_domain_span(sd));
++ cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
+ cpumask_clear(groupmask);
printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
return -1;
}
- printk(KERN_CONT "span %s level %s\n",
- str, sd_level_to_string(sd->level));
+ printk(KERN_CONT "span %s level %s\n", str, sd->name);
- if (!cpu_isset(cpu, sd->span)) {
+ if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
printk(KERN_ERR "ERROR: domain->span does not contain "
"CPU%d\n", cpu);
}
- if (!cpu_isset(cpu, group->cpumask)) {
+ if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
printk(KERN_ERR "ERROR: domain->groups does not contain"
" CPU%d\n", cpu);
}
break;
}
- if (!cpus_weight(group->cpumask)) {
+ if (!cpumask_weight(sched_group_cpus(group))) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: empty group\n");
break;
}
- if (cpus_intersects(*groupmask, group->cpumask)) {
+ if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: repeated CPUs\n");
break;
}
- cpus_or(*groupmask, *groupmask, group->cpumask);
+ cpumask_or(groupmask, groupmask, sched_group_cpus(group));
- cpulist_scnprintf(str, sizeof(str), &group->cpumask);
- cpulist_scnprintf(str, sizeof(str), *sched_group_cpus(group));
++ cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
printk(KERN_CONT " %s", str);
group = group->next;
} while (group != sd->groups);
printk(KERN_CONT "\n");
- if (!cpus_equal(sd->span, *groupmask))
+ if (!cpumask_equal(sched_domain_span(sd), groupmask))
printk(KERN_ERR "ERROR: groups don't span domain->span\n");
- if (sd->parent && !cpus_subset(*groupmask, sd->parent->span))
+ if (sd->parent &&
+ !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
printk(KERN_ERR "ERROR: parent span is not a superset "
"of domain->span\n");
return 0;
static void sched_domain_debug(struct sched_domain *sd, int cpu)
{
- cpumask_t *groupmask;
+ cpumask_var_t groupmask;
int level = 0;
if (!sd) {
printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
- groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
- if (!groupmask) {
+ if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
return;
}
if (!sd)
break;
}
- kfree(groupmask);
+ free_cpumask_var(groupmask);
}
#else /* !CONFIG_SCHED_DEBUG */
# define sched_domain_debug(sd, cpu) do { } while (0)
static int sd_degenerate(struct sched_domain *sd)
{
- if (cpus_weight(sd->span) == 1)
+ if (cpumask_weight(sched_domain_span(sd)) == 1)
return 1;
/* Following flags need at least 2 groups */
if (sd_degenerate(parent))
return 1;
- if (!cpus_equal(sd->span, parent->span))
+ if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
return 0;
/* Does parent contain flags not in child? */
SD_BALANCE_EXEC |
SD_SHARE_CPUPOWER |
SD_SHARE_PKG_RESOURCES);
+ if (nr_node_ids == 1)
+ pflags &= ~SD_SERIALIZE;
}
if (~cflags & pflags)
return 0;
return 1;
}
+ static void free_rootdomain(struct root_domain *rd)
+ {
+ cpupri_cleanup(&rd->cpupri);
+
+ free_cpumask_var(rd->rto_mask);
+ free_cpumask_var(rd->online);
+ free_cpumask_var(rd->span);
+ kfree(rd);
+ }
+
static void rq_attach_root(struct rq *rq, struct root_domain *rd)
{
unsigned long flags;
if (rq->rd) {
struct root_domain *old_rd = rq->rd;
- if (cpu_isset(rq->cpu, old_rd->online))
+ if (cpumask_test_cpu(rq->cpu, old_rd->online))
set_rq_offline(rq);
- cpu_clear(rq->cpu, old_rd->span);
+ cpumask_clear_cpu(rq->cpu, old_rd->span);
if (atomic_dec_and_test(&old_rd->refcount))
- kfree(old_rd);
+ free_rootdomain(old_rd);
}
atomic_inc(&rd->refcount);
rq->rd = rd;
- cpu_set(rq->cpu, rd->span);
- if (cpu_isset(rq->cpu, cpu_online_map))
+ cpumask_set_cpu(rq->cpu, rd->span);
+ if (cpumask_test_cpu(rq->cpu, cpu_online_mask))
set_rq_online(rq);
spin_unlock_irqrestore(&rq->lock, flags);
}
- static void init_rootdomain(struct root_domain *rd)
+ static int init_rootdomain(struct root_domain *rd, bool bootmem)
{
memset(rd, 0, sizeof(*rd));
- cpus_clear(rd->span);
- cpus_clear(rd->online);
+ if (bootmem) {
+ alloc_bootmem_cpumask_var(&def_root_domain.span);
+ alloc_bootmem_cpumask_var(&def_root_domain.online);
+ alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
+ cpupri_init(&rd->cpupri, true);
+ return 0;
+ }
+
+ if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+ goto free_rd;
+ if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+ goto free_span;
+ if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+ goto free_online;
+
+ if (cpupri_init(&rd->cpupri, false) != 0)
+ goto free_rto_mask;
+ return 0;
- cpupri_init(&rd->cpupri);
+ free_rto_mask:
+ free_cpumask_var(rd->rto_mask);
+ free_online:
+ free_cpumask_var(rd->online);
+ free_span:
+ free_cpumask_var(rd->span);
+ free_rd:
+ kfree(rd);
+ return -ENOMEM;
}
static void init_defrootdomain(void)
{
- init_rootdomain(&def_root_domain);
+ init_rootdomain(&def_root_domain, true);
+
atomic_set(&def_root_domain.refcount, 1);
}
if (!rd)
return NULL;
- init_rootdomain(rd);
+ if (init_rootdomain(rd, false) != 0) {
+ kfree(rd);
+ return NULL;
+ }
return rd;
}
}
/* cpus with isolated domains */
- static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
+ static cpumask_var_t cpu_isolated_map;
/* Setup the mask of cpus configured for isolated domains */
static int __init isolated_cpu_setup(char *str)
{
- static int __initdata ints[NR_CPUS];
- int i;
-
- str = get_options(str, ARRAY_SIZE(ints), ints);
- cpus_clear(cpu_isolated_map);
- for (i = 1; i <= ints[0]; i++)
- if (ints[i] < NR_CPUS)
- cpu_set(ints[i], cpu_isolated_map);
- cpulist_parse(str, *cpu_isolated_map);
++ cpulist_parse(str, cpu_isolated_map);
return 1;
}
/*
* init_sched_build_groups takes the cpumask we wish to span, and a pointer
* to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS
- * (due to the fact that we keep track of groups covered with a cpumask_t).
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+ * (due to the fact that we keep track of groups covered with a struct cpumask).
*
* init_sched_build_groups will build a circular linked list of the groups
* covered by the given span, and will set each group's ->cpumask correctly,
* and ->cpu_power to 0.
*/
static void
- init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
- int (*group_fn)(int cpu, const cpumask_t *cpu_map,
+ init_sched_build_groups(const struct cpumask *span,
+ const struct cpumask *cpu_map,
+ int (*group_fn)(int cpu, const struct cpumask *cpu_map,
struct sched_group **sg,
- cpumask_t *tmpmask),
- cpumask_t *covered, cpumask_t *tmpmask)
+ struct cpumask *tmpmask),
+ struct cpumask *covered, struct cpumask *tmpmask)
{
struct sched_group *first = NULL, *last = NULL;
int i;
- cpus_clear(*covered);
+ cpumask_clear(covered);
- for_each_cpu_mask_nr(i, *span) {
+ for_each_cpu(i, span) {
struct sched_group *sg;
int group = group_fn(i, cpu_map, &sg, tmpmask);
int j;
- if (cpu_isset(i, *covered))
+ if (cpumask_test_cpu(i, covered))
continue;
- cpus_clear(sg->cpumask);
+ cpumask_clear(sched_group_cpus(sg));
sg->__cpu_power = 0;
- for_each_cpu_mask_nr(j, *span) {
+ for_each_cpu(j, span) {
if (group_fn(j, cpu_map, NULL, tmpmask) != group)
continue;
- cpu_set(j, *covered);
- cpu_set(j, sg->cpumask);
+ cpumask_set_cpu(j, covered);
+ cpumask_set_cpu(j, sched_group_cpus(sg));
}
if (!first)
first = sg;
* should be one that prevents unnecessary balancing, but also spreads tasks
* out optimally.
*/
- static void sched_domain_node_span(int node, cpumask_t *span)
+ static void sched_domain_node_span(int node, struct cpumask *span)
{
nodemask_t used_nodes;
+ /* FIXME: use cpumask_of_node() */
node_to_cpumask_ptr(nodemask, node);
int i;
int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
/*
+ * The cpus mask in sched_group and sched_domain hangs off the end.
+ * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
+ * for nr_cpu_ids < CONFIG_NR_CPUS.
+ */
+ struct static_sched_group {
+ struct sched_group sg;
+ DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
+ };
+
+ struct static_sched_domain {
+ struct sched_domain sd;
+ DECLARE_BITMAP(span, CONFIG_NR_CPUS);
+ };
+
+ /*
* SMT sched-domains:
*/
#ifdef CONFIG_SCHED_SMT
- static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
- static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
+ static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
static int
- cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
- cpumask_t *unused)
+ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
+ struct sched_group **sg, struct cpumask *unused)
{
if (sg)
- *sg = &per_cpu(sched_group_cpus, cpu);
+ *sg = &per_cpu(sched_group_cpus, cpu).sg;
return cpu;
}
#endif /* CONFIG_SCHED_SMT */
* multi-core sched-domains:
*/
#ifdef CONFIG_SCHED_MC
- static DEFINE_PER_CPU(struct sched_domain, core_domains);
- static DEFINE_PER_CPU(struct sched_group, sched_group_core);
+ static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
#endif /* CONFIG_SCHED_MC */
#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
static int
- cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
- cpumask_t *mask)
+ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+ struct sched_group **sg, struct cpumask *mask)
{
int group;
- *mask = per_cpu(cpu_sibling_map, cpu);
- cpus_and(*mask, *mask, *cpu_map);
- group = first_cpu(*mask);
+ cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+ group = cpumask_first(mask);
if (sg)
- *sg = &per_cpu(sched_group_core, group);
+ *sg = &per_cpu(sched_group_core, group).sg;
return group;
}
#elif defined(CONFIG_SCHED_MC)
static int
- cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
- cpumask_t *unused)
+ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+ struct sched_group **sg, struct cpumask *unused)
{
if (sg)
- *sg = &per_cpu(sched_group_core, cpu);
+ *sg = &per_cpu(sched_group_core, cpu).sg;
return cpu;
}
#endif
- static DEFINE_PER_CPU(struct sched_domain, phys_domains);
- static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
+ static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
static int
- cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
- cpumask_t *mask)
+ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
+ struct sched_group **sg, struct cpumask *mask)
{
int group;
#ifdef CONFIG_SCHED_MC
+ /* FIXME: Use cpu_coregroup_mask. */
*mask = cpu_coregroup_map(cpu);
cpus_and(*mask, *mask, *cpu_map);
- group = first_cpu(*mask);
+ group = cpumask_first(mask);
#elif defined(CONFIG_SCHED_SMT)
- *mask = per_cpu(cpu_sibling_map, cpu);
- cpus_and(*mask, *mask, *cpu_map);
- group = first_cpu(*mask);
+ cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+ group = cpumask_first(mask);
#else
group = cpu;
#endif
if (sg)
- *sg = &per_cpu(sched_group_phys, group);
+ *sg = &per_cpu(sched_group_phys, group).sg;
return group;
}
static struct sched_group ***sched_group_nodes_bycpu;
static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
- static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
- static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
- struct sched_group **sg, cpumask_t *nodemask)
+ static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
+ struct sched_group **sg,
+ struct cpumask *nodemask)
{
int group;
+ /* FIXME: use cpumask_of_node */
+ node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
- *nodemask = node_to_cpumask(cpu_to_node(cpu));
- cpus_and(*nodemask, *nodemask, *cpu_map);
- group = first_cpu(*nodemask);
+ cpumask_and(nodemask, pnodemask, cpu_map);
+ group = cpumask_first(nodemask);
if (sg)
- *sg = &per_cpu(sched_group_allnodes, group);
+ *sg = &per_cpu(sched_group_allnodes, group).sg;
return group;
}
if (!sg)
return;
do {
- for_each_cpu_mask_nr(j, sg->cpumask) {
+ for_each_cpu(j, sched_group_cpus(sg)) {
struct sched_domain *sd;
- sd = &per_cpu(phys_domains, j);
- if (j != first_cpu(sd->groups->cpumask)) {
+ sd = &per_cpu(phys_domains, j).sd;
+ if (j != cpumask_first(sched_group_cpus(sd->groups))) {
/*
* Only add "power" once for each
* physical package.
#ifdef CONFIG_NUMA
/* Free memory allocated for various sched_group structures */
- static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+ static void free_sched_groups(const struct cpumask *cpu_map,
+ struct cpumask *nodemask)
{
int cpu, i;
- for_each_cpu_mask_nr(cpu, *cpu_map) {
+ for_each_cpu(cpu, cpu_map) {
struct sched_group **sched_group_nodes
= sched_group_nodes_bycpu[cpu];
for (i = 0; i < nr_node_ids; i++) {
struct sched_group *oldsg, *sg = sched_group_nodes[i];
+ /* FIXME: Use cpumask_of_node */
+ node_to_cpumask_ptr(pnodemask, i);
- *nodemask = node_to_cpumask(i);
- cpus_and(*nodemask, *nodemask, *cpu_map);
- if (cpus_empty(*nodemask))
+ cpus_and(*nodemask, *pnodemask, *cpu_map);
+ if (cpumask_empty(nodemask))
continue;
if (sg == NULL)
}
}
#else /* !CONFIG_NUMA */
- static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+ static void free_sched_groups(const struct cpumask *cpu_map,
+ struct cpumask *nodemask)
{
}
#endif /* CONFIG_NUMA */
WARN_ON(!sd || !sd->groups);
- if (cpu != first_cpu(sd->groups->cpumask))
+ if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
return;
child = sd->child;
SD_INIT_FUNC(MC)
#endif
- /*
- * To minimize stack usage kmalloc room for cpumasks and share the
- * space as the usage in build_sched_domains() dictates. Used only
- * if the amount of space is significant.
- */
- struct allmasks {
- cpumask_t tmpmask; /* make this one first */
- union {
- cpumask_t nodemask;
- cpumask_t this_sibling_map;
- cpumask_t this_core_map;
- };
- cpumask_t send_covered;
-
- #ifdef CONFIG_NUMA
- cpumask_t domainspan;
- cpumask_t covered;
- cpumask_t notcovered;
- #endif
- };
-
- #if NR_CPUS > 128
- #define SCHED_CPUMASK_ALLOC 1
- #define SCHED_CPUMASK_FREE(v) kfree(v)
- #define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
- #else
- #define SCHED_CPUMASK_ALLOC 0
- #define SCHED_CPUMASK_FREE(v)
- #define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
- #endif
-
- #define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
- ((unsigned long)(a) + offsetof(struct allmasks, v))
-
static int default_relax_domain_level = -1;
static int __init setup_relax_domain_level(char *str)
* Build sched domains for a given set of cpus and attach the sched domains
* to the individual cpus
*/
- static int __build_sched_domains(const cpumask_t *cpu_map,
+ static int __build_sched_domains(const struct cpumask *cpu_map,
struct sched_domain_attr *attr)
{
- int i;
+ int i, err = -ENOMEM;
struct root_domain *rd;
- SCHED_CPUMASK_DECLARE(allmasks);
- cpumask_t *tmpmask;
+ cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
+ tmpmask;
#ifdef CONFIG_NUMA
+ cpumask_var_t domainspan, covered, notcovered;
struct sched_group **sched_group_nodes = NULL;
int sd_allnodes = 0;
+ if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
+ goto out;
+ if (!alloc_cpumask_var(&covered, GFP_KERNEL))
+ goto free_domainspan;
+ if (!alloc_cpumask_var(¬covered, GFP_KERNEL))
+ goto free_covered;
+ #endif
+
+ if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
+ goto free_notcovered;
+ if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
+ goto free_nodemask;
+ if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
+ goto free_this_sibling_map;
+ if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
+ goto free_this_core_map;
+ if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ goto free_send_covered;
+
+ #ifdef CONFIG_NUMA
/*
* Allocate the per-node list of sched groups
*/
GFP_KERNEL);
if (!sched_group_nodes) {
printk(KERN_WARNING "Can not alloc sched group node list\n");
- return -ENOMEM;
+ goto free_tmpmask;
}
#endif
rd = alloc_rootdomain();
if (!rd) {
printk(KERN_WARNING "Cannot alloc root domain\n");
- #ifdef CONFIG_NUMA
- kfree(sched_group_nodes);
- #endif
- return -ENOMEM;
+ goto free_sched_groups;
}
- #if SCHED_CPUMASK_ALLOC
- /* get space for all scratch cpumask variables */
- allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
- if (!allmasks) {
- printk(KERN_WARNING "Cannot alloc cpumask array\n");
- kfree(rd);
#ifdef CONFIG_NUMA
- kfree(sched_group_nodes);
- #endif
- return -ENOMEM;
- }
- #endif
- tmpmask = (cpumask_t *)allmasks;
-
-
- #ifdef CONFIG_NUMA
- sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
+ sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
#endif
/*
* Set up domains for cpus specified by the cpu_map.
*/
- for_each_cpu_mask_nr(i, *cpu_map) {
+ for_each_cpu(i, cpu_map) {
struct sched_domain *sd = NULL, *p;
- SCHED_CPUMASK_VAR(nodemask, allmasks);
+ /* FIXME: use cpumask_of_node */
*nodemask = node_to_cpumask(cpu_to_node(i));
cpus_and(*nodemask, *nodemask, *cpu_map);
#ifdef CONFIG_NUMA
- if (cpus_weight(*cpu_map) >
- SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) {
+ if (cpumask_weight(cpu_map) >
+ SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
sd = &per_cpu(allnodes_domains, i);
SD_INIT(sd, ALLNODES);
set_domain_attribute(sd, attr);
- sd->span = *cpu_map;
+ cpumask_copy(sched_domain_span(sd), cpu_map);
cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
p = sd;
sd_allnodes = 1;
sd = &per_cpu(node_domains, i);
SD_INIT(sd, NODE);
set_domain_attribute(sd, attr);
- sched_domain_node_span(cpu_to_node(i), &sd->span);
+ sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
sd->parent = p;
if (p)
p->child = sd;
- cpus_and(sd->span, sd->span, *cpu_map);
+ cpumask_and(sched_domain_span(sd),
+ sched_domain_span(sd), cpu_map);
#endif
p = sd;
- sd = &per_cpu(phys_domains, i);
+ sd = &per_cpu(phys_domains, i).sd;
SD_INIT(sd, CPU);
set_domain_attribute(sd, attr);
- sd->span = *nodemask;
+ cpumask_copy(sched_domain_span(sd), nodemask);
sd->parent = p;
if (p)
p->child = sd;
#ifdef CONFIG_SCHED_MC
p = sd;
- sd = &per_cpu(core_domains, i);
+ sd = &per_cpu(core_domains, i).sd;
SD_INIT(sd, MC);
set_domain_attribute(sd, attr);
- sd->span = cpu_coregroup_map(i);
- cpus_and(sd->span, sd->span, *cpu_map);
+ *sched_domain_span(sd) = cpu_coregroup_map(i);
+ cpumask_and(sched_domain_span(sd),
+ sched_domain_span(sd), cpu_map);
sd->parent = p;
p->child = sd;
cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
#ifdef CONFIG_SCHED_SMT
p = sd;
- sd = &per_cpu(cpu_domains, i);
+ sd = &per_cpu(cpu_domains, i).sd;
SD_INIT(sd, SIBLING);
set_domain_attribute(sd, attr);
- sd->span = per_cpu(cpu_sibling_map, i);
- cpus_and(sd->span, sd->span, *cpu_map);
+ cpumask_and(sched_domain_span(sd),
+ &per_cpu(cpu_sibling_map, i), cpu_map);
sd->parent = p;
p->child = sd;
cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
#ifdef CONFIG_SCHED_SMT
/* Set up CPU (sibling) groups */
- for_each_cpu_mask_nr(i, *cpu_map) {
- SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
- SCHED_CPUMASK_VAR(send_covered, allmasks);
-
- *this_sibling_map = per_cpu(cpu_sibling_map, i);
- cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
- if (i != first_cpu(*this_sibling_map))
+ for_each_cpu(i, cpu_map) {
+ cpumask_and(this_sibling_map,
+ &per_cpu(cpu_sibling_map, i), cpu_map);
+ if (i != cpumask_first(this_sibling_map))
continue;
init_sched_build_groups(this_sibling_map, cpu_map,
#ifdef CONFIG_SCHED_MC
/* Set up multi-core groups */
- for_each_cpu_mask_nr(i, *cpu_map) {
- SCHED_CPUMASK_VAR(this_core_map, allmasks);
- SCHED_CPUMASK_VAR(send_covered, allmasks);
-
+ for_each_cpu(i, cpu_map) {
+ /* FIXME: Use cpu_coregroup_mask */
*this_core_map = cpu_coregroup_map(i);
cpus_and(*this_core_map, *this_core_map, *cpu_map);
- if (i != first_cpu(*this_core_map))
+ if (i != cpumask_first(this_core_map))
continue;
init_sched_build_groups(this_core_map, cpu_map,
/* Set up physical groups */
for (i = 0; i < nr_node_ids; i++) {
- SCHED_CPUMASK_VAR(nodemask, allmasks);
- SCHED_CPUMASK_VAR(send_covered, allmasks);
-
+ /* FIXME: Use cpumask_of_node */
*nodemask = node_to_cpumask(i);
cpus_and(*nodemask, *nodemask, *cpu_map);
- if (cpus_empty(*nodemask))
+ if (cpumask_empty(nodemask))
continue;
init_sched_build_groups(nodemask, cpu_map,
#ifdef CONFIG_NUMA
/* Set up node groups */
if (sd_allnodes) {
- SCHED_CPUMASK_VAR(send_covered, allmasks);
-
init_sched_build_groups(cpu_map, cpu_map,
&cpu_to_allnodes_group,
send_covered, tmpmask);
for (i = 0; i < nr_node_ids; i++) {
/* Set up node groups */
struct sched_group *sg, *prev;
- SCHED_CPUMASK_VAR(nodemask, allmasks);
- SCHED_CPUMASK_VAR(domainspan, allmasks);
- SCHED_CPUMASK_VAR(covered, allmasks);
int j;
+ /* FIXME: Use cpumask_of_node */
*nodemask = node_to_cpumask(i);
- cpus_clear(*covered);
+ cpumask_clear(covered);
cpus_and(*nodemask, *nodemask, *cpu_map);
- if (cpus_empty(*nodemask)) {
+ if (cpumask_empty(nodemask)) {
sched_group_nodes[i] = NULL;
continue;
}
sched_domain_node_span(i, domainspan);
- cpus_and(*domainspan, *domainspan, *cpu_map);
+ cpumask_and(domainspan, domainspan, cpu_map);
- sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i);
+ sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
+ GFP_KERNEL, i);
if (!sg) {
printk(KERN_WARNING "Can not alloc domain group for "
"node %d\n", i);
goto error;
}
sched_group_nodes[i] = sg;
- for_each_cpu_mask_nr(j, *nodemask) {
+ for_each_cpu(j, nodemask) {
struct sched_domain *sd;
sd = &per_cpu(node_domains, j);
sd->groups = sg;
}
sg->__cpu_power = 0;
- sg->cpumask = *nodemask;
+ cpumask_copy(sched_group_cpus(sg), nodemask);
sg->next = sg;
- cpus_or(*covered, *covered, *nodemask);
+ cpumask_or(covered, covered, nodemask);
prev = sg;
for (j = 0; j < nr_node_ids; j++) {
- SCHED_CPUMASK_VAR(notcovered, allmasks);
int n = (i + j) % nr_node_ids;
+ /* FIXME: Use cpumask_of_node */
node_to_cpumask_ptr(pnodemask, n);
- cpus_complement(*notcovered, *covered);
- cpus_and(*tmpmask, *notcovered, *cpu_map);
- cpus_and(*tmpmask, *tmpmask, *domainspan);
- if (cpus_empty(*tmpmask))
+ cpumask_complement(notcovered, covered);
+ cpumask_and(tmpmask, notcovered, cpu_map);
+ cpumask_and(tmpmask, tmpmask, domainspan);
+ if (cpumask_empty(tmpmask))
break;
- cpus_and(*tmpmask, *tmpmask, *pnodemask);
- if (cpus_empty(*tmpmask))
+ cpumask_and(tmpmask, tmpmask, pnodemask);
+ if (cpumask_empty(tmpmask))
continue;
- sg = kmalloc_node(sizeof(struct sched_group),
+ sg = kmalloc_node(sizeof(struct sched_group) +
+ cpumask_size(),
GFP_KERNEL, i);
if (!sg) {
printk(KERN_WARNING
goto error;
}
sg->__cpu_power = 0;
- sg->cpumask = *tmpmask;
+ cpumask_copy(sched_group_cpus(sg), tmpmask);
sg->next = prev->next;
- cpus_or(*covered, *covered, *tmpmask);
+ cpumask_or(covered, covered, tmpmask);
prev->next = sg;
prev = sg;
}
/* Calculate CPU power for physical packages and nodes */
#ifdef CONFIG_SCHED_SMT
- for_each_cpu_mask_nr(i, *cpu_map) {
- struct sched_domain *sd = &per_cpu(cpu_domains, i);
+ for_each_cpu(i, cpu_map) {
+ struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
init_sched_groups_power(i, sd);
}
#endif
#ifdef CONFIG_SCHED_MC
- for_each_cpu_mask_nr(i, *cpu_map) {
- struct sched_domain *sd = &per_cpu(core_domains, i);
+ for_each_cpu(i, cpu_map) {
+ struct sched_domain *sd = &per_cpu(core_domains, i).sd;
init_sched_groups_power(i, sd);
}
#endif
- for_each_cpu_mask_nr(i, *cpu_map) {
- struct sched_domain *sd = &per_cpu(phys_domains, i);
+ for_each_cpu(i, cpu_map) {
+ struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
init_sched_groups_power(i, sd);
}
if (sd_allnodes) {
struct sched_group *sg;
- cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg,
+ cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
tmpmask);
init_numa_sched_groups_power(sg);
}
#endif
/* Attach the domains */
- for_each_cpu_mask_nr(i, *cpu_map) {
+ for_each_cpu(i, cpu_map) {
struct sched_domain *sd;
#ifdef CONFIG_SCHED_SMT
- sd = &per_cpu(cpu_domains, i);
+ sd = &per_cpu(cpu_domains, i).sd;
#elif defined(CONFIG_SCHED_MC)
- sd = &per_cpu(core_domains, i);
+ sd = &per_cpu(core_domains, i).sd;
#else
- sd = &per_cpu(phys_domains, i);
+ sd = &per_cpu(phys_domains, i).sd;
#endif
cpu_attach_domain(sd, rd, i);
}
- SCHED_CPUMASK_FREE((void *)allmasks);
- return 0;
+ err = 0;
+
+ free_tmpmask:
+ free_cpumask_var(tmpmask);
+ free_send_covered:
+ free_cpumask_var(send_covered);
+ free_this_core_map:
+ free_cpumask_var(this_core_map);
+ free_this_sibling_map:
+ free_cpumask_var(this_sibling_map);
+ free_nodemask:
+ free_cpumask_var(nodemask);
+ free_notcovered:
+ #ifdef CONFIG_NUMA
+ free_cpumask_var(notcovered);
+ free_covered:
+ free_cpumask_var(covered);
+ free_domainspan:
+ free_cpumask_var(domainspan);
+ out:
+ #endif
+ return err;
+
+ free_sched_groups:
+ #ifdef CONFIG_NUMA
+ kfree(sched_group_nodes);
+ #endif
+ goto free_tmpmask;
#ifdef CONFIG_NUMA
error:
free_sched_groups(cpu_map, tmpmask);
- SCHED_CPUMASK_FREE((void *)allmasks);
- kfree(rd);
- return -ENOMEM;
+ free_rootdomain(rd);
+ goto free_tmpmask;
#endif
}
- static int build_sched_domains(const cpumask_t *cpu_map)
+ static int build_sched_domains(const struct cpumask *cpu_map)
{
return __build_sched_domains(cpu_map, NULL);
}
- static cpumask_t *doms_cur; /* current sched domains */
+ static struct cpumask *doms_cur; /* current sched domains */
static int ndoms_cur; /* number of sched domains in 'doms_cur' */
static struct sched_domain_attr *dattr_cur;
/* attribues of custom domains in 'doms_cur' */
/*
* Special case: If a kmalloc of a doms_cur partition (array of
- * cpumask_t) fails, then fallback to a single sched domain,
- * as determined by the single cpumask_t fallback_doms.
+ * cpumask) fails, then fallback to a single sched domain,
+ * as determined by the single cpumask fallback_doms.
*/
- static cpumask_t fallback_doms;
+ static cpumask_var_t fallback_doms;
- void __attribute__((weak)) arch_update_cpu_topology(void)
+ /*
+ * arch_update_cpu_topology lets virtualized architectures update the
+ * cpu core maps. It is supposed to return 1 if the topology changed
+ * or 0 if it stayed the same.
+ */
+ int __attribute__((weak)) arch_update_cpu_topology(void)
{
+ return 0;
}
/*
* For now this just excludes isolated cpus, but could be used to
* exclude other special cases in the future.
*/
- static int arch_init_sched_domains(const cpumask_t *cpu_map)
+ static int arch_init_sched_domains(const struct cpumask *cpu_map)
{
int err;
arch_update_cpu_topology();
ndoms_cur = 1;
- doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+ doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
if (!doms_cur)
- doms_cur = &fallback_doms;
- cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
+ doms_cur = fallback_doms;
+ cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
dattr_cur = NULL;
err = build_sched_domains(doms_cur);
register_sched_domain_sysctl();
return err;
}
- static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
- cpumask_t *tmpmask)
+ static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
+ struct cpumask *tmpmask)
{
free_sched_groups(cpu_map, tmpmask);
}
* Detach sched domains from a group of cpus specified in cpu_map
* These cpus will now be attached to the NULL domain
*/
- static void detach_destroy_domains(const cpumask_t *cpu_map)
+ static void detach_destroy_domains(const struct cpumask *cpu_map)
{
- cpumask_t tmpmask;
+ /* Save because hotplug lock held. */
+ static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
int i;
- unregister_sched_domain_sysctl();
-
- for_each_cpu_mask_nr(i, *cpu_map)
+ for_each_cpu(i, cpu_map)
cpu_attach_domain(NULL, &def_root_domain, i);
synchronize_sched();
- arch_destroy_sched_domains(cpu_map, &tmpmask);
+ arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
}
/* handle null as "default" */
* doms_new[] to the current sched domain partitioning, doms_cur[].
* It destroys each deleted domain and builds each new domain.
*
- * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
+ * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
* The masks don't intersect (don't overlap.) We should setup one
* sched domain for each mask. CPUs not in any of the cpumasks will
* not be load balanced. If the same cpumask appears both in the
* the single partition 'fallback_doms', it also forces the domains
* to be rebuilt.
*
- * If doms_new == NULL it will be replaced with cpu_online_map.
+ * If doms_new == NULL it will be replaced with cpu_online_mask.
* ndoms_new == 0 is a special case for destroying existing domains,
* and it will not create the default domain.
*
* Call with hotplug lock held
*/
- void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+ /* FIXME: Change to struct cpumask *doms_new[] */
+ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
struct sched_domain_attr *dattr_new)
{
int i, j, n;
+ int new_topology;
mutex_lock(&sched_domains_mutex);
/* always unregister in case we don't destroy any domains */
unregister_sched_domain_sysctl();
+ /* Let architecture update cpu core mappings. */
+ new_topology = arch_update_cpu_topology();
+
n = doms_new ? ndoms_new : 0;
/* Destroy deleted domains */
for (i = 0; i < ndoms_cur; i++) {
- for (j = 0; j < n; j++) {
- if (cpus_equal(doms_cur[i], doms_new[j])
+ for (j = 0; j < n && !new_topology; j++) {
+ if (cpumask_equal(&doms_cur[i], &doms_new[j])
&& dattrs_equal(dattr_cur, i, dattr_new, j))
goto match1;
}
if (doms_new == NULL) {
ndoms_cur = 0;
- doms_new = &fallback_doms;
- cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
- dattr_new = NULL;
+ doms_new = fallback_doms;
+ cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
+ WARN_ON_ONCE(dattr_new);
}
/* Build new domains */
for (i = 0; i < ndoms_new; i++) {
- for (j = 0; j < ndoms_cur; j++) {
- if (cpus_equal(doms_new[i], doms_cur[j])
+ for (j = 0; j < ndoms_cur && !new_topology; j++) {
+ if (cpumask_equal(&doms_new[i], &doms_cur[j])
&& dattrs_equal(dattr_new, i, dattr_cur, j))
goto match2;
}
}
/* Remember the new sched domains */
- if (doms_cur != &fallback_doms)
+ if (doms_cur != fallback_doms)
kfree(doms_cur);
kfree(dattr_cur); /* kfree(NULL) is safe */
doms_cur = doms_new;
void __init sched_init_smp(void)
{
- cpumask_t non_isolated_cpus;
+ cpumask_var_t non_isolated_cpus;
+
+ alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
#if defined(CONFIG_NUMA)
sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
#endif
get_online_cpus();
mutex_lock(&sched_domains_mutex);
- arch_init_sched_domains(&cpu_online_map);
- cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
- if (cpus_empty(non_isolated_cpus))
- cpu_set(smp_processor_id(), non_isolated_cpus);
+ arch_init_sched_domains(cpu_online_mask);
+ cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
+ if (cpumask_empty(non_isolated_cpus))
+ cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex);
put_online_cpus();
init_hrtick();
/* Move init over to a non-isolated CPU */
- if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
+ if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
BUG();
sched_init_granularity();
+ free_cpumask_var(non_isolated_cpus);
+
+ alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
+ init_sched_rt_class();
}
#else
void __init sched_init_smp(void)
*/
current->sched_class = &fair_sched_class;
+ /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
+ alloc_bootmem_cpumask_var(&nohz_cpu_mask);
+ #ifdef CONFIG_SMP
+ #ifdef CONFIG_NO_HZ
+ alloc_bootmem_cpumask_var(&nohz.cpu_mask);
+ #endif
+ alloc_bootmem_cpumask_var(&cpu_isolated_map);
+ #endif /* SMP */
+
scheduler_running = 1;
}
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
{
struct cfs_rq *cfs_rq;
- struct sched_entity *se, *parent_se;
+ struct sched_entity *se;
struct rq *rq;
int i;
for_each_possible_cpu(i) {
rq = cpu_rq(i);
- cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
+ GFP_KERNEL, cpu_to_node(i));
if (!cfs_rq)
goto err;
- se = kmalloc_node(sizeof(struct sched_entity),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ se = kzalloc_node(sizeof(struct sched_entity),
+ GFP_KERNEL, cpu_to_node(i));
if (!se)
goto err;
- parent_se = parent ? parent->se[i] : NULL;
- init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
+ init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
}
return 1;
int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
{
struct rt_rq *rt_rq;
- struct sched_rt_entity *rt_se, *parent_se;
+ struct sched_rt_entity *rt_se;
struct rq *rq;
int i;
for_each_possible_cpu(i) {
rq = cpu_rq(i);
- rt_rq = kmalloc_node(sizeof(struct rt_rq),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ rt_rq = kzalloc_node(sizeof(struct rt_rq),
+ GFP_KERNEL, cpu_to_node(i));
if (!rt_rq)
goto err;
- rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
+ GFP_KERNEL, cpu_to_node(i));
if (!rt_se)
goto err;
- parent_se = parent ? parent->rt_se[i] : NULL;
- init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
+ init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
}
return 1;
* (balbir@in.ibm.com).
*/
- /* track cpu usage of a group of tasks */
+ /* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct cpuacct *parent;
};
struct cgroup_subsys cpuacct_subsys;
return ERR_PTR(-ENOMEM);
}
+ if (cgrp->parent)
+ ca->parent = cgroup_ca(cgrp->parent);
+
return &ca->css;
}
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int cpu;
if (!cpuacct_subsys.active)
return;
+ cpu = task_cpu(tsk);
ca = task_ca(tsk);
- if (ca) {
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
+ for (; ca; ca = ca->parent) {
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}
for_each_domain(cpu, sd) {
enum cpu_idle_type itype;
- cpumask_scnprintf(mask_str, mask_len, &sd->span);
+ cpumask_scnprintf(mask_str, mask_len,
- *sched_domain_span(sd));
++ sched_domain_span(sd));
seq_printf(seq, "domain%d %s", dcount++, mask_str);
for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
itype++) {
#include <linux/gfp.h>
#include <linux/fs.h>
#include <linux/kprobes.h>
+ #include <linux/seq_file.h>
#include <linux/writeback.h>
#include <linux/stacktrace.h>
unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
unsigned long __read_mostly tracing_thresh;
+ /*
+ * We need to change this state when a selftest is running.
+ * A selftest will lurk into the ring-buffer to count the
+ * entries inserted during the selftest although some concurrent
+ * insertions into the ring-buffer such as ftrace_printk could occurred
+ * at the same time, giving false positive or negative results.
+ */
+ static bool __read_mostly tracing_selftest_running;
+
+ /* For tracers that don't implement custom flags */
+ static struct tracer_opt dummy_tracer_opt[] = {
+ { }
+ };
+
+ static struct tracer_flags dummy_tracer_flags = {
+ .val = 0,
+ .opts = dummy_tracer_opt
+ };
+
+ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
+ {
+ return 0;
+ }
+
+ /*
+ * Kill all tracing for good (never come back).
+ * It is initialized to 1 but will turn to zero if the initialization
+ * of the tracer is successful. But that is the only place that sets
+ * this back to zero.
+ */
+ int tracing_disabled = 1;
+
static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
static inline void ftrace_disable_cpu(void)
#define for_each_tracing_cpu(cpu) \
for_each_cpu_mask(cpu, tracing_buffer_mask)
- static int tracing_disabled = 1;
+ /*
+ * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
+ *
+ * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
+ * is set, then ftrace_dump is called. This will output the contents
+ * of the ftrace buffers to the console. This is very useful for
+ * capturing traces that lead to crashes and outputing it to a
+ * serial console.
+ *
+ * It is default off, but you can enable it with either specifying
+ * "ftrace_dump_on_oops" in the kernel command line, or setting
+ * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ */
+ int ftrace_dump_on_oops;
+
+ static int tracing_set_tracer(char *buf);
+
+ static int __init set_ftrace(char *str)
+ {
+ tracing_set_tracer(str);
+ return 1;
+ }
+ __setup("ftrace", set_ftrace);
+
+ static int __init set_ftrace_dump_on_oops(char *str)
+ {
+ ftrace_dump_on_oops = 1;
+ return 1;
+ }
+ __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
long
ns2usecs(cycle_t nsec)
/* tracer_enabled is used to toggle activation of a tracer */
static int tracer_enabled = 1;
+ /**
+ * tracing_is_enabled - return tracer_enabled status
+ *
+ * This function is used by other tracers to know the status
+ * of the tracer_enabled flag. Tracers may use this function
+ * to know if it should enable their features when starting
+ * up. See irqsoff tracer for an example (start_irqsoff_tracer).
+ */
+ int tracing_is_enabled(void)
+ {
+ return tracer_enabled;
+ }
+
/* function tracing enabled */
int ftrace_function_enabled;
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
- /* trace_flags holds iter_ctrl options */
- unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
+ /* trace_flags holds trace_options default values */
+ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
+ TRACE_ITER_ANNOTATE;
/**
* trace_wake_up - wake up tasks waiting for trace input
return nsecs / 1000;
}
- /*
- * TRACE_ITER_SYM_MASK masks the options in trace_flags that
- * control the output of kernel symbols.
- */
- #define TRACE_ITER_SYM_MASK \
- (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
-
/* These must match the bit postions in trace_iterator_flags */
static const char *trace_options[] = {
"print-parent",
"stacktrace",
"sched-tree",
"ftrace_printk",
+ "ftrace_preempt",
+ "branch",
+ "annotate",
+ "userstacktrace",
+ "sym-userobj",
NULL
};
return trace_seq_putmem(s, hex, j);
}
+ static int
+ trace_seq_path(struct trace_seq *s, struct path *path)
+ {
+ unsigned char *p;
+
+ if (s->len >= (PAGE_SIZE - 1))
+ return 0;
+ p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+ if (!IS_ERR(p)) {
+ p = mangle_path(s->buffer + s->len, p, "\n");
+ if (p) {
+ s->len = p - s->buffer;
+ return 1;
+ }
+ } else {
+ s->buffer[s->len++] = '?';
+ return 1;
+ }
+
+ return 0;
+ }
+
static void
trace_seq_reset(struct trace_seq *s)
{
return -1;
}
+ /*
+ * When this gets called we hold the BKL which means that
+ * preemption is disabled. Various trace selftests however
+ * need to disable and enable preemption for successful tests.
+ * So we drop the BKL here and grab it after the tests again.
+ */
+ unlock_kernel();
mutex_lock(&trace_types_lock);
+
+ tracing_selftest_running = true;
+
for (t = trace_types; t; t = t->next) {
if (strcmp(type->name, t->name) == 0) {
/* already found */
}
}
+ if (!type->set_flag)
+ type->set_flag = &dummy_set_flag;
+ if (!type->flags)
+ type->flags = &dummy_tracer_flags;
+ else
+ if (!type->flags->opts)
+ type->flags->opts = dummy_tracer_opt;
+
#ifdef CONFIG_FTRACE_STARTUP_TEST
if (type->selftest) {
struct tracer *saved_tracer = current_trace;
struct trace_array *tr = &global_trace;
- int saved_ctrl = tr->ctrl;
int i;
+
/*
* Run a selftest on this tracer.
* Here we reset the trace buffer, and set the current
* internal tracing to verify that everything is in order.
* If we fail, we do not register this tracer.
*/
- for_each_tracing_cpu(i) {
+ for_each_tracing_cpu(i)
tracing_reset(tr, i);
- }
+
current_trace = type;
- tr->ctrl = 0;
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
ret = type->selftest(type, tr);
/* the test is responsible for resetting too */
current_trace = saved_tracer;
- tr->ctrl = saved_ctrl;
if (ret) {
printk(KERN_CONT "FAILED!\n");
goto out;
}
/* Only reset on passing, to avoid touching corrupted buffers */
- for_each_tracing_cpu(i) {
+ for_each_tracing_cpu(i)
tracing_reset(tr, i);
- }
+
printk(KERN_CONT "PASSED\n");
}
#endif
max_tracer_type_len = len;
out:
+ tracing_selftest_running = false;
mutex_unlock(&trace_types_lock);
+ lock_kernel();
return ret;
}
cmdline_idx = 0;
}
+ static int trace_stop_count;
+ static DEFINE_SPINLOCK(tracing_start_lock);
+
+ /**
+ * ftrace_off_permanent - disable all ftrace code permanently
+ *
+ * This should only be called when a serious anomally has
+ * been detected. This will turn off the function tracing,
+ * ring buffers, and other tracing utilites. It takes no
+ * locks and can be called from any context.
+ */
+ void ftrace_off_permanent(void)
+ {
+ tracing_disabled = 1;
+ ftrace_stop();
+ tracing_off_permanent();
+ }
+
+ /**
+ * tracing_start - quick start of the tracer
+ *
+ * If tracing is enabled but was stopped by tracing_stop,
+ * this will start the tracer back up.
+ */
+ void tracing_start(void)
+ {
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ if (tracing_disabled)
+ return;
+
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (--trace_stop_count)
+ goto out;
+
+ if (trace_stop_count < 0) {
+ /* Someone screwed up their debugging */
+ WARN_ON_ONCE(1);
+ trace_stop_count = 0;
+ goto out;
+ }
+
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ ftrace_start();
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+ }
+
+ /**
+ * tracing_stop - quick stop of the tracer
+ *
+ * Light weight way to stop tracing. Use in conjunction with
+ * tracing_start.
+ */
+ void tracing_stop(void)
+ {
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ ftrace_stop();
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (trace_stop_count++)
+ goto out;
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+ }
+
void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
spin_unlock(&trace_cmdline_lock);
}
- static char *trace_find_cmdline(int pid)
+ char *trace_find_cmdline(int pid)
{
char *cmdline = "<...>";
unsigned map;
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
+ entry->tgid = (tsk) ? tsk->tgid : 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
}
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ static void __trace_graph_entry(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct ftrace_graph_ent *trace,
+ unsigned long flags,
+ int pc)
+ {
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ent_entry *entry;
+ unsigned long irq_flags;
+
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
+
+ event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_GRAPH_ENT;
+ entry->graph_ent = *trace;
+ ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+ }
+
+ static void __trace_graph_return(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct ftrace_graph_ret *trace,
+ unsigned long flags,
+ int pc)
+ {
+ struct ring_buffer_event *event;
+ struct ftrace_graph_ret_entry *entry;
+ unsigned long irq_flags;
+
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
+
+ event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_GRAPH_RET;
+ entry->ret = *trace;
+ ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+ }
+ #endif
+
void
ftrace(struct trace_array *tr, struct trace_array_cpu *data,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
ftrace_trace_stack(tr, data, flags, skip, preempt_count());
}
+ static void ftrace_trace_userstack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags, int pc)
+ {
+ #ifdef CONFIG_STACKTRACE
+ struct ring_buffer_event *event;
+ struct userstack_entry *entry;
+ struct stack_trace trace;
+ unsigned long irq_flags;
+
+ if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
+ return;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_USER_STACK;
+
+ memset(&entry->caller, 0, sizeof(entry->caller));
+
+ trace.nr_entries = 0;
+ trace.max_entries = FTRACE_STACK_ENTRIES;
+ trace.skip = 0;
+ trace.entries = entry->caller;
+
+ save_stack_trace_user(&trace);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ #endif
+ }
+
+ void __trace_userstack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags)
+ {
+ ftrace_trace_userstack(tr, data, flags, preempt_count());
+ }
+
static void
ftrace_trace_special(void *__tr, void *__data,
unsigned long arg1, unsigned long arg2, unsigned long arg3,
entry->arg3 = arg3;
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+ ftrace_trace_userstack(tr, data, irq_flags, pc);
trace_wake_up();
}
entry->next_cpu = task_cpu(next);
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
ftrace_trace_stack(tr, data, flags, 5, pc);
+ ftrace_trace_userstack(tr, data, flags, pc);
}
void
entry->next_cpu = task_cpu(wakee);
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
ftrace_trace_stack(tr, data, flags, 6, pc);
+ ftrace_trace_userstack(tr, data, flags, pc);
trace_wake_up();
}
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
+ unsigned long flags;
int cpu;
int pc;
- if (tracing_disabled || !tr->ctrl)
+ if (tracing_disabled)
return;
pc = preempt_count();
- preempt_disable_notrace();
+ local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
- if (likely(!atomic_read(&data->disabled)))
+ if (likely(atomic_inc_return(&data->disabled) == 1))
ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
- preempt_enable_notrace();
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
}
#ifdef CONFIG_FUNCTION_TRACER
static void
- function_trace_call(unsigned long ip, unsigned long parent_ip)
+ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
return;
pc = preempt_count();
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
local_save_flags(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
trace_function(tr, data, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
+ }
+
+ static void
+ function_trace_call(unsigned long ip, unsigned long parent_ip)
+ {
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ /*
+ * Need to use raw, since this must be called before the
+ * recursive protection is performed.
+ */
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ trace_function(tr, data, ip, parent_ip, flags, pc);
+ }
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+ }
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ int trace_graph_entry(struct ftrace_graph_ent *trace)
+ {
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (!ftrace_trace_task(current))
+ return 0;
+
+ if (!ftrace_graph_addr(trace->func))
+ return 0;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ __trace_graph_entry(tr, data, trace, flags, pc);
+ }
+ /* Only do the atomic if it is not already set */
+ if (!test_tsk_trace_graph(current))
+ set_tsk_trace_graph(current);
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+
+ return 1;
+ }
+
+ void trace_graph_return(struct ftrace_graph_ret *trace)
+ {
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ __trace_graph_return(tr, data, trace, flags, pc);
+ }
+ if (!trace->depth)
+ clear_tsk_trace_graph(current);
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
}
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
static struct ftrace_ops trace_ops __read_mostly =
{
void tracing_start_function_trace(void)
{
ftrace_function_enabled = 0;
+
+ if (trace_flags & TRACE_ITER_PREEMPTONLY)
+ trace_ops.func = function_trace_call_preempt_only;
+ else
+ trace_ops.func = function_trace_call;
+
register_ftrace_function(&trace_ops);
- if (tracer_enabled)
- ftrace_function_enabled = 1;
+ ftrace_function_enabled = 1;
}
void tracing_stop_function_trace(void)
enum trace_file_type {
TRACE_FILE_LAT_FMT = 1,
+ TRACE_FILE_ANNOTATE = 2,
};
static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
atomic_inc(&trace_record_cmdline_disabled);
- /* let the tracer grab locks here if needed */
- if (current_trace->start)
- current_trace->start(iter);
-
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
static void s_stop(struct seq_file *m, void *p)
{
- struct trace_iterator *iter = m->private;
-
atomic_dec(&trace_record_cmdline_disabled);
-
- /* let the tracer release locks here if needed */
- if (current_trace && current_trace == iter->trace && iter->trace->stop)
- iter->trace->stop(iter);
-
mutex_unlock(&trace_types_lock);
}
# define IP_FMT "%016lx"
#endif
- static int
+ int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
{
int ret;
return ret;
}
+ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+ unsigned long ip, unsigned long sym_flags)
+ {
+ struct file *file = NULL;
+ unsigned long vmstart = 0;
+ int ret = 1;
+
+ if (mm) {
+ const struct vm_area_struct *vma;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, ip);
+ if (vma) {
+ file = vma->vm_file;
+ vmstart = vma->vm_start;
+ }
+ if (file) {
+ ret = trace_seq_path(s, &file->f_path);
+ if (ret)
+ ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
+ }
+ up_read(&mm->mmap_sem);
+ }
+ if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+ ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+ return ret;
+ }
+
+ static int
+ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+ unsigned long sym_flags)
+ {
+ struct mm_struct *mm = NULL;
+ int ret = 1;
+ unsigned int i;
+
+ if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+ struct task_struct *task;
+ /*
+ * we do the lookup on the thread group leader,
+ * since individual threads might have already quit!
+ */
+ rcu_read_lock();
+ task = find_task_by_vpid(entry->ent.tgid);
+ if (task)
+ mm = get_task_mm(task);
+ rcu_read_unlock();
+ }
+
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ unsigned long ip = entry->caller[i];
+
+ if (ip == ULONG_MAX || !ret)
+ break;
+ if (i && ret)
+ ret = trace_seq_puts(s, " <- ");
+ if (!ip) {
+ if (ret)
+ ret = trace_seq_puts(s, "??");
+ continue;
+ }
+ if (!ret)
+ break;
+ if (ret)
+ ret = seq_print_user_ip(s, mm, ip, sym_flags);
+ }
+
+ if (mm)
+ mmput(mm);
+ return ret;
+ }
+
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n");
trace_seq_putc(s, '\n');
}
+ static void test_cpu_buff_start(struct trace_iterator *iter)
+ {
+ struct trace_seq *s = &iter->seq;
+
+ if (!(trace_flags & TRACE_ITER_ANNOTATE))
+ return;
+
+ if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
+ return;
+
+ if (cpu_isset(iter->cpu, iter->started))
+ return;
+
+ cpu_set(iter->cpu, iter->started);
+ trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+ }
+
static enum print_line_t
print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
{
if (entry->type == TRACE_CONT)
return TRACE_TYPE_HANDLED;
+ test_cpu_buff_start(iter);
+
next_entry = find_next_entry(iter, NULL, &next_ts);
if (!next_entry)
next_ts = iter->ts;
trace_seq_print_cont(s, iter);
break;
}
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
+ case TRACE_USER_STACK: {
+ struct userstack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ seq_print_userip_objs(field, s, sym_flags);
+ trace_seq_putc(s, '\n');
+ break;
+ }
default:
trace_seq_printf(s, "Unknown type %d\n", entry->type);
}
if (entry->type == TRACE_CONT)
return TRACE_TYPE_HANDLED;
+ test_cpu_buff_start(iter);
+
comm = trace_find_cmdline(iter->ent->pid);
t = ns2usecs(iter->ts);
trace_seq_print_cont(s, iter);
break;
}
+ case TRACE_GRAPH_RET: {
+ return print_graph_function(iter);
+ }
+ case TRACE_GRAPH_ENT: {
+ return print_graph_function(iter);
+ }
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
+ case TRACE_USER_STACK: {
+ struct userstack_entry *field;
+
+ trace_assign_type(field, entry);
+
+ ret = seq_print_userip_objs(field, s, sym_flags);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_putc(s, '\n');
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ break;
+ }
}
return TRACE_TYPE_HANDLED;
}
break;
}
case TRACE_SPECIAL:
+ case TRACE_USER_STACK:
case TRACE_STACK: {
struct special_entry *field;
break;
}
case TRACE_SPECIAL:
+ case TRACE_USER_STACK:
case TRACE_STACK: {
struct special_entry *field;
break;
}
case TRACE_SPECIAL:
+ case TRACE_USER_STACK:
case TRACE_STACK: {
struct special_entry *field;
seq_printf(m, "# tracer: %s\n", iter->trace->name);
seq_puts(m, "#\n");
}
- if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+ if (iter->trace && iter->trace->print_header)
+ iter->trace->print_header(m);
+ else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
/* print nothing if the buffers are empty */
if (trace_empty(iter))
return 0;
iter->trace = current_trace;
iter->pos = -1;
+ /* Notify the tracer early; before we stop tracing. */
+ if (iter->trace && iter->trace->open)
+ iter->trace->open(iter);
+
+ /* Annotate start of buffers if we had overruns */
+ if (ring_buffer_overruns(iter->tr->buffer))
+ iter->iter_flags |= TRACE_FILE_ANNOTATE;
+
+
for_each_tracing_cpu(cpu) {
iter->buffer_iter[cpu] =
m->private = iter;
/* stop the trace while dumping */
- if (iter->tr->ctrl) {
- tracer_enabled = 0;
- ftrace_function_enabled = 0;
- }
-
- if (iter->trace && iter->trace->open)
- iter->trace->open(iter);
+ tracing_stop();
mutex_unlock(&trace_types_lock);
iter->trace->close(iter);
/* reenable tracing if it was previously enabled */
- if (iter->tr->ctrl) {
- tracer_enabled = 1;
- /*
- * It is safe to enable function tracing even if it
- * isn't used
- */
- ftrace_function_enabled = 1;
- }
+ tracing_start();
mutex_unlock(&trace_types_lock);
seq_release(inode, file);
mutex_lock(&tracing_cpumask_update_lock);
- len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+ len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
if (count - len < 2) {
count = -EINVAL;
goto out_err;
int err, cpu;
mutex_lock(&tracing_cpumask_update_lock);
- err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+ err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
if (err)
goto err_unlock;
- raw_local_irq_disable();
+ local_irq_disable();
__raw_spin_lock(&ftrace_max_lock);
for_each_tracing_cpu(cpu) {
/*
}
}
__raw_spin_unlock(&ftrace_max_lock);
- raw_local_irq_enable();
+ local_irq_enable();
tracing_cpumask = tracing_cpumask_new;
};
static ssize_t
- tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
+ tracing_trace_options_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ int i;
char *buf;
int r = 0;
int len = 0;
- int i;
+ u32 tracer_flags = current_trace->flags->val;
+ struct tracer_opt *trace_opts = current_trace->flags->opts;
+
/* calulate max size */
for (i = 0; trace_options[i]; i++) {
len += 3; /* "no" and space */
}
+ /*
+ * Increase the size with names of options specific
+ * of the current tracer.
+ */
+ for (i = 0; trace_opts[i].name; i++) {
+ len += strlen(trace_opts[i].name);
+ len += 3; /* "no" and space */
+ }
+
/* +2 for \n and \0 */
buf = kmalloc(len + 2, GFP_KERNEL);
if (!buf)
r += sprintf(buf + r, "no%s ", trace_options[i]);
}
+ for (i = 0; trace_opts[i].name; i++) {
+ if (tracer_flags & trace_opts[i].bit)
+ r += sprintf(buf + r, "%s ",
+ trace_opts[i].name);
+ else
+ r += sprintf(buf + r, "no%s ",
+ trace_opts[i].name);
+ }
+
r += sprintf(buf + r, "\n");
WARN_ON(r >= len + 2);
return r;
}
+ /* Try to assign a tracer specific option */
+ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
+ {
+ struct tracer_flags *trace_flags = trace->flags;
+ struct tracer_opt *opts = NULL;
+ int ret = 0, i = 0;
+ int len;
+
+ for (i = 0; trace_flags->opts[i].name; i++) {
+ opts = &trace_flags->opts[i];
+ len = strlen(opts->name);
+
+ if (strncmp(cmp, opts->name, len) == 0) {
+ ret = trace->set_flag(trace_flags->val,
+ opts->bit, !neg);
+ break;
+ }
+ }
+ /* Not found */
+ if (!trace_flags->opts[i].name)
+ return -EINVAL;
+
+ /* Refused to handle */
+ if (ret)
+ return ret;
+
+ if (neg)
+ trace_flags->val &= ~opts->bit;
+ else
+ trace_flags->val |= opts->bit;
+
+ return 0;
+ }
+
static ssize_t
- tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
+ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
char *cmp = buf;
int neg = 0;
+ int ret;
int i;
if (cnt >= sizeof(buf))
break;
}
}
- /*
- * If no option could be set, return an error:
- */
- if (!trace_options[i])
- return -EINVAL;
+
+ /* If no option could be set, test the specific tracer options */
+ if (!trace_options[i]) {
+ ret = set_tracer_option(current_trace, cmp, neg);
+ if (ret)
+ return ret;
+ }
filp->f_pos += cnt;
static struct file_operations tracing_iter_fops = {
.open = tracing_open_generic,
- .read = tracing_iter_ctrl_read,
- .write = tracing_iter_ctrl_write,
+ .read = tracing_trace_options_read,
+ .write = tracing_trace_options_write,
};
static const char readme_msg[] =
"# echo sched_switch > /debug/tracing/current_tracer\n"
"# cat /debug/tracing/current_tracer\n"
"sched_switch\n"
- "# cat /debug/tracing/iter_ctrl\n"
+ "# cat /debug/tracing/trace_options\n"
"noprint-parent nosym-offset nosym-addr noverbose\n"
- "# echo print-parent > /debug/tracing/iter_ctrl\n"
+ "# echo print-parent > /debug/tracing/trace_options\n"
"# echo 1 > /debug/tracing/tracing_enabled\n"
"# cat /debug/tracing/trace > /tmp/trace.txt\n"
"echo 0 > /debug/tracing/tracing_enabled\n"
tracing_ctrl_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
int r;
- r = sprintf(buf, "%ld\n", tr->ctrl);
+ r = sprintf(buf, "%u\n", tracer_enabled);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
val = !!val;
mutex_lock(&trace_types_lock);
- if (tr->ctrl ^ val) {
- if (val)
+ if (tracer_enabled ^ val) {
+ if (val) {
tracer_enabled = 1;
- else
+ if (current_trace->start)
+ current_trace->start(tr);
+ tracing_start();
+ } else {
tracer_enabled = 0;
-
- tr->ctrl = val;
-
- if (current_trace && current_trace->ctrl_update)
- current_trace->ctrl_update(tr);
+ tracing_stop();
+ if (current_trace->stop)
+ current_trace->stop(tr);
+ }
}
mutex_unlock(&trace_types_lock);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
- static ssize_t
- tracing_set_trace_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
+ static int tracing_set_tracer(char *buf)
{
struct trace_array *tr = &global_trace;
struct tracer *t;
- char buf[max_tracer_type_len+1];
- int i;
- size_t ret;
-
- ret = cnt;
-
- if (cnt > max_tracer_type_len)
- cnt = max_tracer_type_len;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- /* strip ending whitespace. */
- for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
- buf[i] = 0;
+ int ret = 0;
mutex_lock(&trace_types_lock);
for (t = trace_types; t; t = t->next) {
if (t == current_trace)
goto out;
+ trace_branch_disable();
if (current_trace && current_trace->reset)
current_trace->reset(tr);
current_trace = t;
- if (t->init)
- t->init(tr);
+ if (t->init) {
+ ret = t->init(tr);
+ if (ret)
+ goto out;
+ }
+ trace_branch_enable(tr);
out:
mutex_unlock(&trace_types_lock);
- if (ret > 0)
- filp->f_pos += ret;
+ return ret;
+ }
+
+ static ssize_t
+ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+ {
+ char buf[max_tracer_type_len+1];
+ int i;
+ size_t ret;
+ int err;
+
+ ret = cnt;
+
+ if (cnt > max_tracer_type_len)
+ cnt = max_tracer_type_len;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ /* strip ending whitespace. */
+ for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
+ buf[i] = 0;
+
+ err = tracing_set_tracer(buf);
+ if (err)
+ return err;
+
+ filp->f_pos += ret;
return ret;
}
return -ENOMEM;
mutex_lock(&trace_types_lock);
+
+ /* trace pipe does not show start of buffer */
+ cpus_setall(iter->started);
+
iter->tr = &global_trace;
iter->trace = current_trace;
filp->private_data = iter;
char buf[64];
int r;
- r = sprintf(buf, "%lu\n", tr->entries);
+ r = sprintf(buf, "%lu\n", tr->entries >> 10);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
unsigned long val;
char buf[64];
int ret, cpu;
- struct trace_array *tr = filp->private_data;
if (cnt >= sizeof(buf))
return -EINVAL;
mutex_lock(&trace_types_lock);
- if (tr->ctrl) {
- cnt = -EBUSY;
- pr_info("ftrace: please disable tracing"
- " before modifying buffer size\n");
- goto out;
- }
+ tracing_stop();
/* disable all cpu buffers */
for_each_tracing_cpu(cpu) {
atomic_inc(&max_tr.data[cpu]->disabled);
}
+ /* value is in KB */
+ val <<= 10;
+
if (val != global_trace.entries) {
ret = ring_buffer_resize(global_trace.buffer, val);
if (ret < 0) {
atomic_dec(&max_tr.data[cpu]->disabled);
}
+ tracing_start();
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
int ret;
va_list args;
va_start(args, fmt);
- ret = trace_vprintk(0, fmt, args);
+ ret = trace_vprintk(0, -1, fmt, args);
va_end(args);
return ret;
}
{
char *buf;
char *end;
- struct trace_array *tr = &global_trace;
- if (!tr->ctrl || tracing_disabled)
+ if (tracing_disabled)
return -EINVAL;
if (cnt > TRACE_BUF_SIZE)
#ifdef CONFIG_DYNAMIC_FTRACE
+ int __weak ftrace_arch_read_dyn_info(char *buf, int size)
+ {
+ return 0;
+ }
+
static ssize_t
- tracing_read_long(struct file *filp, char __user *ubuf,
+ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ static char ftrace_dyn_info_buffer[1024];
+ static DEFINE_MUTEX(dyn_info_mutex);
unsigned long *p = filp->private_data;
- char buf[64];
+ char *buf = ftrace_dyn_info_buffer;
+ int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
int r;
- r = sprintf(buf, "%ld\n", *p);
+ mutex_lock(&dyn_info_mutex);
+ r = sprintf(buf, "%ld ", *p);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
+ buf[r++] = '\n';
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+
+ mutex_unlock(&dyn_info_mutex);
+
+ return r;
}
- static struct file_operations tracing_read_long_fops = {
+ static struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
- .read = tracing_read_long,
+ .read = tracing_read_dyn_info,
};
#endif
if (!entry)
pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
- entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
+ entry = debugfs_create_file("trace_options", 0644, d_tracer,
NULL, &tracing_iter_fops);
if (!entry)
- pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
+ pr_warning("Could not create debugfs 'trace_options' entry\n");
entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
NULL, &tracing_cpumask_fops);
pr_warning("Could not create debugfs "
"'trace_pipe' entry\n");
- entry = debugfs_create_file("trace_entries", 0644, d_tracer,
+ entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
&global_trace, &tracing_entries_fops);
if (!entry)
pr_warning("Could not create debugfs "
- "'trace_entries' entry\n");
+ "'buffer_size_kb' entry\n");
entry = debugfs_create_file("trace_marker", 0220, d_tracer,
NULL, &tracing_mark_fops);
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt,
- &tracing_read_long_fops);
+ &tracing_dyn_info_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'dyn_ftrace_total_info' entry\n");
return 0;
}
- int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
{
static DEFINE_SPINLOCK(trace_buf_lock);
static char trace_buf[TRACE_BUF_SIZE];
struct ring_buffer_event *event;
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
- struct print_entry *entry;
- unsigned long flags, irq_flags;
int cpu, len = 0, size, pc;
+ struct print_entry *entry;
+ unsigned long irq_flags;
- if (!tr->ctrl || tracing_disabled)
+ if (tracing_disabled || tracing_selftest_running)
return 0;
pc = preempt_count();
if (unlikely(atomic_read(&data->disabled)))
goto out;
- spin_lock_irqsave(&trace_buf_lock, flags);
+ pause_graph_tracing();
+ spin_lock_irqsave(&trace_buf_lock, irq_flags);
len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
len = min(len, TRACE_BUF_SIZE-1);
if (!event)
goto out_unlock;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
+ tracing_generic_entry_update(&entry->ent, irq_flags, pc);
entry->ent.type = TRACE_PRINT;
entry->ip = ip;
+ entry->depth = depth;
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = 0;
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
out_unlock:
- spin_unlock_irqrestore(&trace_buf_lock, flags);
-
+ spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
+ unpause_graph_tracing();
out:
preempt_enable_notrace();
return 0;
va_start(ap, fmt);
- ret = trace_vprintk(ip, fmt, ap);
+ ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
va_end(ap);
return ret;
}
static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
return NOTIFY_OK;
}
{
switch (val) {
case DIE_OOPS:
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
break;
default:
break;
trace_seq_reset(s);
}
-
void ftrace_dump(void)
{
static DEFINE_SPINLOCK(ftrace_dump_lock);
atomic_inc(&global_trace.data[cpu]->disabled);
}
+ /* don't look at user memory in panic mode */
+ trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+
printk(KERN_TRACE "Dumping ftrace buffer:\n");
iter.tr = &global_trace;
#endif
/* All seems OK, enable tracing */
- global_trace.ctrl = tracer_enabled;
tracing_disabled = 0;
atomic_notifier_chain_register(&panic_notifier_list,