X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fkernel%2Fapic%2Fio_apic.c;h=d836b4d347e60d0587220b5349153770090b4bd8;hb=ffc438366c2660a6a811b94ba33229bf217f8254;hp=0ad3fe77641a7b5c6b797d64e4dc4c6e359f8e13;hpb=b24696bc55f66fecc30715e003f10fc2555a9271;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 0ad3fe7..d836b4d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -59,12 +59,15 @@ #include #include #include +#include #include #include #include #define __apicdebuginit(type) static type __init +#define for_each_irq_pin(entry, head) \ + for (entry = head; entry; entry = entry->next) /* * Is the SiS APIC rmw bug present ? @@ -115,32 +118,25 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -struct irq_pin_list; - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - struct irq_pin_list { int apic, pin; struct irq_pin_list *next; }; -static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) +static struct irq_pin_list *get_one_free_irq_2_pin(int node) { struct irq_pin_list *pin; - int node; - - node = cpu_to_node(cpu); pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); return pin; } +/* + * This is performance-critical, we want to do it O(1) + * + * Most irqs are mapped 1:1 with pins. + */ struct irq_cfg { struct irq_pin_list *irq_2_pin; cpumask_var_t domain; @@ -148,9 +144,6 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - u8 move_desc_pending : 1; -#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -182,16 +175,18 @@ int __init arch_early_irq_init(void) struct irq_cfg *cfg; struct irq_desc *desc; int count; + int node; int i; cfg = irq_cfgx; count = ARRAY_SIZE(irq_cfgx); + node= cpu_to_node(boot_cpu_id); for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; - alloc_bootmem_cpumask_var(&cfg[i].domain); - alloc_bootmem_cpumask_var(&cfg[i].old_domain); + zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); + zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); if (i < NR_IRQS_LEGACY) cpumask_setall(cfg[i].domain); } @@ -212,12 +207,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq) return cfg; } -static struct irq_cfg *get_one_free_irq_cfg(int cpu) +static struct irq_cfg *get_one_free_irq_cfg(int node) { struct irq_cfg *cfg; - int node; - - node = cpu_to_node(cpu); cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); if (cfg) { @@ -238,13 +230,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) return cfg; } -int arch_init_chip_data(struct irq_desc *desc, int cpu) +int arch_init_chip_data(struct irq_desc *desc, int node) { struct irq_cfg *cfg; cfg = desc->chip_data; if (!cfg) { - desc->chip_data = get_one_free_irq_cfg(cpu); + desc->chip_data = get_one_free_irq_cfg(node); if (!desc->chip_data) { printk(KERN_ERR "can not alloc irq_cfg\n"); BUG_ON(1); @@ -254,10 +246,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu) return 0; } -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - +/* for move_irq_desc */ static void -init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) +init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node) { struct irq_pin_list *old_entry, *head, *tail, *entry; @@ -266,7 +257,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) if (!old_entry) return; - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) return; @@ -276,7 +267,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) tail = entry; old_entry = old_entry->next; while (old_entry) { - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) { entry = head; while (entry) { @@ -316,12 +307,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) } void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) + struct irq_desc *desc, int node) { struct irq_cfg *cfg; struct irq_cfg *old_cfg; - cfg = get_one_free_irq_cfg(cpu); + cfg = get_one_free_irq_cfg(node); if (!cfg) return; @@ -332,7 +323,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); - init_copy_irq_2_pin(old_cfg, cfg, cpu); + init_copy_irq_2_pin(old_cfg, cfg, node); } static void free_irq_cfg(struct irq_cfg *old_cfg) @@ -356,19 +347,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) old_desc->chip_data = NULL; } } - -static void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg = desc->chip_data; - - if (!cfg->move_in_progress) { - /* it means that domain is not changed */ - if (!cpumask_intersects(desc->affinity, mask)) - cfg->move_desc_pending = 1; - } -} -#endif +/* end for move_irq_desc */ #else static struct irq_cfg *irq_cfg(unsigned int irq) @@ -378,13 +357,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif -#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ -} -#endif - struct io_apic { unsigned int index; unsigned int unused[3]; @@ -440,13 +412,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - entry = cfg->irq_2_pin; - for (;;) { + for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; int pin; - if (!entry) - break; pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ @@ -454,9 +423,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) spin_unlock_irqrestore(&ioapic_lock, flags); return true; } - if (!entry->next) - break; - entry = entry->next; } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -488,7 +454,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { - union entry_union eu; + union entry_union eu = {{0, 0}}; + eu.entry = e; io_apic_write(apic, 0x11 + 2*pin, eu.w2); io_apic_write(apic, 0x10 + 2*pin, eu.w1); @@ -518,191 +485,73 @@ static void ioapic_mask_entry(int apic, int pin) spin_unlock_irqrestore(&ioapic_lock, flags); } -#ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(irq)) - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - -/* - * Either sets desc->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that, or returns BAD_APICID and - * leaves desc->affinity untouched. - */ -static unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned int irq; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; - - irq = desc->irq; - cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; - - /* check that before desc->addinity get updated */ - set_extra_move_desc(desc, mask); - - cpumask_copy(desc->affinity, mask); - - return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); -} - -static void -set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, cfg); - } - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void -set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - set_ioapic_affinity_irq_desc(desc, mask); -} -#endif /* CONFIG_SMP */ - /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) +static int +add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) { - struct irq_pin_list *entry; - - entry = cfg->irq_2_pin; - if (!entry) { - entry = get_one_free_irq_2_pin(cpu); - if (!entry) { - printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", - apic, pin); - return; - } - cfg->irq_2_pin = entry; - entry->apic = apic; - entry->pin = pin; - return; - } + struct irq_pin_list **last, *entry; - while (entry->next) { - /* not again, please */ + /* don't allow duplicates */ + last = &cfg->irq_2_pin; + for_each_irq_pin(entry, cfg->irq_2_pin) { if (entry->apic == apic && entry->pin == pin) - return; - - entry = entry->next; + return 0; + last = &entry->next; } - entry->next = get_one_free_irq_2_pin(cpu); - entry = entry->next; + entry = get_one_free_irq_2_pin(node); + if (!entry) { + printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", + node, apic, pin); + return -ENOMEM; + } entry->apic = apic; entry->pin = pin; + + *last = entry; + return 0; +} + +static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +{ + if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) + panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); } /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, - int oldapic, int oldpin, - int newapic, int newpin) +static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, + int oldapic, int oldpin, + int newapic, int newpin) { - struct irq_pin_list *entry = cfg->irq_2_pin; - int replaced = 0; + struct irq_pin_list *entry; - while (entry) { + for_each_irq_pin(entry, cfg->irq_2_pin) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; - replaced = 1; /* every one is different, right? */ - break; + return; } - entry = entry->next; } - /* why? call replace before add? */ - if (!replaced) - add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); + /* old apic/pin didn't exist, so just add new ones */ + add_pin_to_irq_node(cfg, node, newapic, newpin); } -static inline void io_apic_modify_irq(struct irq_cfg *cfg, - int mask_and, int mask_or, - void (*final)(struct irq_pin_list *entry)) +static void io_apic_modify_irq(struct irq_cfg *cfg, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) { int pin; struct irq_pin_list *entry; - for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin * 2); @@ -719,7 +568,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); } -#ifdef CONFIG_X86_64 static void io_apic_sync(struct irq_pin_list *entry) { /* @@ -735,11 +583,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -#else /* CONFIG_X86_32 */ -static void __mask_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); -} static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) { @@ -752,7 +595,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } -#endif /* CONFIG_X86_32 */ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { @@ -850,7 +692,6 @@ static int __init ioapic_pirq_setup(char *str) __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_INTR_REMAP struct IO_APIC_route_entry **alloc_ioapic_entries(void) { int apic; @@ -948,20 +789,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) return 0; } -void reinit_intr_remapped_IO_APIC(int intr_remapping, - struct IO_APIC_route_entry **ioapic_entries) - -{ - /* - * for now plain restore of previous settings. - * TBD: In the case of OS enabling interrupt-remapping, - * IO-APIC RTE's need to be setup to point to interrupt-remapping - * table entries. for now, do a plain restore, and wait for - * the setup_IO_APIC_irqs() to do proper initialization. - */ - restore_IO_APIC_setup(ioapic_entries); -} - void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) { int apic; @@ -971,7 +798,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) kfree(ioapic_entries); } -#endif /* * Find the IRQ entry number of a certain pin. @@ -1032,54 +858,6 @@ static int __init find_isa_irq_apic(int irq, int type) return -1; } -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); - if (test_bit(bus, mp_bus_not_pci)) { - apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || - mp_irqs[i].dstapic == MP_APIC_ALL) - break; - - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} - -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - #if defined(CONFIG_EISA) || defined(CONFIG_MCA) /* * EISA Edge/Level control register, ELCR @@ -1298,6 +1076,64 @@ static int pin_2_irq(int idx, int apic, int pin) return irq; } +/* + * Find a specific PCI IRQ entry. + * Not an __init, possibly needed by modules + */ +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, + struct io_apic_irq_attr *irq_attr) +{ + int apic, i, best_guess = -1; + + apic_printk(APIC_DEBUG, + "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); + if (test_bit(bus, mp_bus_not_pci)) { + apic_printk(APIC_VERBOSE, + "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + return -1; + } + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].srcbus; + + for (apic = 0; apic < nr_ioapics; apic++) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || + mp_irqs[i].dstapic == MP_APIC_ALL) + break; + + if (!test_bit(lbus, mp_bus_not_pci) && + !mp_irqs[i].irqtype && + (bus == lbus) && + (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); + + if (!(apic || IO_APIC_IRQ(irq))) + continue; + + if (pin == (mp_irqs[i].srcbusirq & 3)) { + set_io_apic_irq_attr(irq_attr, apic, + mp_irqs[i].dstirq, + irq_trigger(i), + irq_polarity(i)); + return irq; + } + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_guess < 0) { + set_io_apic_irq_attr(irq_attr, apic, + mp_irqs[i].dstirq, + irq_trigger(i), + irq_polarity(i)); + best_guess = irq; + } + } + } + return best_guess; +} +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); + void lock_vector_lock(void) { /* Used to the online set of cpus does not change @@ -1559,6 +1395,9 @@ int setup_ioapic_entry(int apic_id, int irq, irte.vector = vector; irte.dest_id = IRTE_DEST(destination); + /* Set source-id of interrupt request */ + set_ioapic_sid(&irte, apic_id); + modify_irte(irq, &irte); ir_entry->index2 = (index >> 15) & 0x1; @@ -1628,58 +1467,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq ioapic_write_entry(apic_id, pin, entry); } +static struct { + DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); +} mp_ioapic_routing[MAX_IO_APICS]; + static void __init setup_IO_APIC_irqs(void) { - int apic_id, pin, idx, irq; + int apic_id = 0, pin, idx, irq; int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { - for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { - - idx = find_irq_entry(apic_id, pin, mp_INT); - if (idx == -1) { - if (!notcon) { - notcon = 1; - apic_printk(APIC_VERBOSE, - KERN_DEBUG " %d-%d", - mp_ioapics[apic_id].apicid, pin); - } else - apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic_id].apicid, pin); - continue; - } - if (notcon) { - apic_printk(APIC_VERBOSE, - " (apicid-pin) not connected\n"); - notcon = 0; - } +#ifdef CONFIG_ACPI + if (!acpi_disabled && acpi_ioapic) { + apic_id = mp_find_ioapic(0); + if (apic_id < 0) + apic_id = 0; + } +#endif - irq = pin_2_irq(idx, apic_id, pin); + for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { + idx = find_irq_entry(apic_id, pin, mp_INT); + if (idx == -1) { + if (!notcon) { + notcon = 1; + apic_printk(APIC_VERBOSE, + KERN_DEBUG " %d-%d", + mp_ioapics[apic_id].apicid, pin); + } else + apic_printk(APIC_VERBOSE, " %d-%d", + mp_ioapics[apic_id].apicid, pin); + continue; + } + if (notcon) { + apic_printk(APIC_VERBOSE, + " (apicid-pin) not connected\n"); + notcon = 0; + } - /* - * Skip the timer IRQ if there's a quirk handler - * installed and if it returns 1: - */ - if (apic->multi_timer_check && - apic->multi_timer_check(apic_id, irq)) - continue; + irq = pin_2_irq(idx, apic_id, pin); - desc = irq_to_desc_alloc_cpu(irq, cpu); - if (!desc) { - printk(KERN_INFO "can not get irq_desc for %d\n", irq); - continue; - } - cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); + /* + * Skip the timer IRQ if there's a quirk handler + * installed and if it returns 1: + */ + if (apic->multi_timer_check && + apic->multi_timer_check(apic_id, irq)) + continue; - setup_IO_APIC_irq(apic_id, pin, irq, desc, - irq_trigger(idx), irq_polarity(idx)); + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + continue; } + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, apic_id, pin); + /* + * don't mark it in pin_programmed, so later acpi could + * set it correctly when irq < 16 + */ + setup_IO_APIC_irq(apic_id, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); } if (notcon) @@ -1832,12 +1683,8 @@ __apicdebuginit(void) print_IO_APIC(void) if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", irq); - for (;;) { + for_each_irq_pin(entry, cfg->irq_2_pin) printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = entry->next; - } printk("\n"); } @@ -1846,36 +1693,30 @@ __apicdebuginit(void) print_IO_APIC(void) return; } -__apicdebuginit(void) print_APIC_bitfield(int base) +__apicdebuginit(void) print_APIC_field(int base) { - unsigned int v; - int i, j; + int i; if (apic_verbosity == APIC_QUIET) return; - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ @@ -1957,6 +1798,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); v = apic_read(APIC_TDCR); printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); + + if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { + v = apic_read(APIC_EFEAT); + maxlvt = (v >> 16) & 0xff; + printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v); + v = apic_read(APIC_ECTRL); + printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v); + for (i = 0; i < maxlvt; i++) { + v = apic_read(APIC_EILVTn(i)); + printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); + } + } printk("\n"); } @@ -2005,6 +1858,11 @@ __apicdebuginit(void) print_PIC(void) __apicdebuginit(int) print_all_ICs(void) { print_PIC(); + + /* don't print out if apic is not there */ + if (!cpu_has_apic || disable_apic) + return 0; + print_all_local_APICs(); print_IO_APIC(); @@ -2120,7 +1978,9 @@ void disable_IO_APIC(void) /* * Use virtual wire A mode when interrupt remapping is enabled. */ - disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); + if (cpu_has_apic) + disconnect_bsp_APIC(!intr_remapping_enabled && + ioapic_i8259.pin != -1); } #ifdef CONFIG_X86_32 @@ -2328,7 +2188,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } -#ifdef CONFIG_X86_64 static int ioapic_retrigger_irq(unsigned int irq) { @@ -2339,27 +2198,124 @@ static int ioapic_retrigger_irq(unsigned int irq) apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); - return 1; + return 1; +} + +/* + * Level and edge triggered IO-APIC interrupts need different handling, + * so we use two separate IRQ descriptors. Edge triggered IRQs can be + * handled with the level-triggered descriptor, but that one has slightly + * more overhead. Level-triggered interrupts cannot be handled with the + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ + +#ifdef CONFIG_SMP +static void send_cleanup_vector(struct irq_cfg *cfg) +{ + cpumask_var_t cleanup_mask; + + if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { + unsigned int i; + cfg->move_cleanup_count = 0; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + cfg->move_cleanup_count++; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + } else { + cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cfg->move_cleanup_count = cpumask_weight(cleanup_mask); + apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + free_cpumask_var(cleanup_mask); + } + cfg->move_in_progress = 0; +} + +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + u8 vector = cfg->vector; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + unsigned int reg; + + apic = entry->apic; + pin = entry->pin; + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); + } +} + +static int +assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); + +/* + * Either sets desc->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * leaves desc->affinity untouched. + */ +static unsigned int +set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned int irq; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return BAD_APICID; + + irq = desc->irq; + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) + return BAD_APICID; + + cpumask_copy(desc->affinity, mask); + + return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); +} + +static int +set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + unsigned int irq; + int ret = -1; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + dest = set_desc_affinity(desc, mask); + if (dest != BAD_APICID) { + /* Only the high 8 bits are valid. */ + dest = SET_APIC_LOGICAL_ID(dest); + __target_IO_APIC_irq(irq, dest, cfg); + ret = 0; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return ret; } -#else -static int ioapic_retrigger_irq(unsigned int irq) -{ - apic->send_IPI_self(irq_cfg(irq)->vector); - return 1; -} -#endif +static int +set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc; -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ + desc = irq_to_desc(irq); -#ifdef CONFIG_SMP + return set_ioapic_affinity_irq_desc(desc, mask); +} #ifdef CONFIG_INTR_REMAP @@ -2374,26 +2330,25 @@ static int ioapic_retrigger_irq(unsigned int irq) * Real vector that is used for interrupting cpu will be coming from * the interrupt-remapping table entry. */ -static void +static int migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; struct irte irte; unsigned int dest; unsigned int irq; + int ret = -1; if (!cpumask_intersects(mask, cpu_online_mask)) - return; + return ret; irq = desc->irq; if (get_irte(irq, &irte)) - return; + return ret; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); + return ret; dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); @@ -2409,27 +2364,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) send_cleanup_vector(cfg); cpumask_copy(desc->affinity, mask); + + return 0; } /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { - migrate_ioapic_irq_desc(desc, mask); + return migrate_ioapic_irq_desc(desc, mask); } -static void set_ir_ioapic_affinity_irq(unsigned int irq, +static int set_ir_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); - set_ir_ioapic_affinity_irq_desc(desc, mask); + return set_ir_ioapic_affinity_irq_desc(desc, mask); } #else -static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { + return 0; } #endif @@ -2491,87 +2449,19 @@ static void irq_complete_move(struct irq_desc **descp) struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - if (likely(!cfg->move_desc_pending)) - return; - - /* domain has not changed, but affinity did */ - me = smp_processor_id(); - if (cpumask_test_cpu(me, desc->affinity)) { - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; - cfg->move_desc_pending = 0; - } -#endif + if (likely(!cfg->move_in_progress)) return; - } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; -#endif + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); - } } #else static inline void irq_complete_move(struct irq_desc **descp) {} #endif -#ifdef CONFIG_INTR_REMAP -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - - entry = cfg->irq_2_pin; - for (;;) { - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - io_apic_eoi(apic, pin); - entry = entry->next; - } -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void ack_x2apic_level(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - ack_x2APIC_irq(); - eoi_ioapic_irq(desc); -} - -static void ack_x2apic_edge(unsigned int irq) -{ - ack_x2APIC_irq(); -} - -#endif - static void ack_apic_edge(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2586,11 +2476,8 @@ atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - -#ifdef CONFIG_X86_32 unsigned long v; int i; -#endif struct irq_cfg *cfg; int do_unmask_irq = 0; @@ -2603,31 +2490,28 @@ static void ack_apic_level(unsigned int irq) } #endif -#ifdef CONFIG_X86_32 /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ cfg = desc->chip_data; i = cfg->vector; - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); -#endif /* * We must acknowledge the irq before we move it or the acknowledge will @@ -2669,7 +2553,7 @@ static void ack_apic_level(unsigned int irq) unmask_IO_APIC_irq_desc(desc); } -#ifdef CONFIG_X86_32 + /* Tail end of version 0x11 I/O APIC bug workaround */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); @@ -2677,9 +2561,46 @@ static void ack_apic_level(unsigned int irq) __unmask_and_level_IO_APIC_irq(cfg); spin_unlock(&ioapic_lock); } -#endif } +#ifdef CONFIG_INTR_REMAP +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry; + + for_each_irq_pin(entry, cfg->irq_2_pin) + io_apic_eoi(entry->apic, entry->pin); +} + +static void +eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void ir_ack_apic_edge(unsigned int irq) +{ + ack_APIC_irq(); +} + +static void ir_ack_apic_level(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + ack_APIC_irq(); + eoi_ioapic_irq(desc); +} +#endif /* CONFIG_INTR_REMAP */ + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .startup = startup_ioapic_irq, @@ -2699,8 +2620,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { .mask = mask_IO_APIC_irq, .unmask = unmask_IO_APIC_irq, #ifdef CONFIG_INTR_REMAP - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, + .ack = ir_ack_apic_edge, + .eoi = ir_ack_apic_level, #ifdef CONFIG_SMP .set_affinity = set_ir_ioapic_affinity_irq, #endif @@ -2881,7 +2802,7 @@ static inline void __init check_timer(void) { struct irq_desc *desc = irq_to_desc(0); struct irq_cfg *cfg = desc->chip_data; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); int apic1, pin1, apic2, pin2; unsigned long flags; int no_pin1 = 0; @@ -2947,7 +2868,7 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); + add_pin_to_irq_node(cfg, node, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } else { /* for edge trigger, setup_IO_APIC_irq already @@ -2984,7 +2905,7 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); + replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); enable_8259A_irq(0); if (timer_irq_works()) { @@ -3196,14 +3117,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY; /* * Dynamic irq allocate and deallocation */ -unsigned int create_irq_nr(unsigned int irq_want) +unsigned int create_irq_nr(unsigned int irq_want, int node) { /* Allocate an unused irq */ unsigned int irq; unsigned int new; unsigned long flags; struct irq_cfg *cfg_new = NULL; - int cpu = boot_cpu_id; struct irq_desc *desc_new = NULL; irq = 0; @@ -3212,7 +3132,7 @@ unsigned int create_irq_nr(unsigned int irq_want) spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { - desc_new = irq_to_desc_alloc_cpu(new, cpu); + desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { printk(KERN_INFO "can not get irq_desc for %d\n", new); continue; @@ -3221,6 +3141,9 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; + + desc_new = move_irq_desc(desc_new, node); + if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; break; @@ -3238,11 +3161,12 @@ unsigned int create_irq_nr(unsigned int irq_want) int create_irq(void) { + int node = cpu_to_node(boot_cpu_id); unsigned int irq_want; int irq; irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) irq = -1; @@ -3261,8 +3185,7 @@ void destroy_irq(unsigned int irq) cfg = desc->chip_data; dynamic_irq_cleanup(irq); /* connect back irq_cfg */ - if (desc) - desc->chip_data = cfg; + desc->chip_data = cfg; free_irte(irq); spin_lock_irqsave(&vector_lock, flags); @@ -3307,6 +3230,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); + /* Set source-id of interrupt request */ + set_msi_sid(&irte, pdev); + modify_irte(irq, &irte); msg->address_hi = MSI_ADDR_BASE_HI; @@ -3344,7 +3270,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } #ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3353,7 +3279,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3365,13 +3291,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg_desc(desc, &msg); + + return 0; } #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void +static int ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); @@ -3380,11 +3308,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irte irte; if (get_irte(irq, &irte)) - return; + return -1; dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3401,6 +3329,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) */ if (cfg->move_in_progress) send_cleanup_vector(cfg); + + return 0; } #endif @@ -3426,7 +3356,7 @@ static struct irq_chip msi_ir_chip = { .unmask = unmask_msi_irq, .mask = mask_msi_irq, #ifdef CONFIG_INTR_REMAP - .ack = ack_x2apic_edge, + .ack = ir_ack_apic_edge, #ifdef CONFIG_SMP .set_affinity = ir_set_msi_irq_affinity, #endif @@ -3496,15 +3426,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) unsigned int irq_want; struct intel_iommu *iommu = NULL; int index = 0; + int node; /* x86 doesn't support multiple MSI yet */ if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; + node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) return -1; irq_want = irq + 1; @@ -3554,7 +3486,7 @@ void arch_teardown_msi_irq(unsigned int irq) #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3563,7 +3495,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3575,11 +3507,13 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ -struct irq_chip dmar_msi_type = { +static struct irq_chip dmar_msi_type = { .name = "DMAR_MSI", .unmask = dmar_msi_unmask, .mask = dmar_msi_mask, @@ -3608,7 +3542,7 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3617,7 +3551,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3629,6 +3563,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ @@ -3648,12 +3584,14 @@ int arch_setup_hpet_msi(unsigned int irq) { int ret; struct msi_msg msg; + struct irq_desc *desc = irq_to_desc(irq); ret = msi_compose_msg(NULL, irq, &msg); if (ret < 0) return ret; hpet_msi_write(irq, &msg); + desc->status |= IRQ_MOVE_PCNTXT; set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, "edge"); @@ -3683,7 +3621,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3691,11 +3629,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; target_ht_irq(irq, dest, cfg->vector); + + return 0; } #endif @@ -3770,6 +3710,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long flags; int err; + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, eligible_cpu); @@ -3783,15 +3725,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -3809,10 +3749,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) struct uv_IO_APIC_route_entry *entry; int mmr_pnode; + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - entry->mask = 1; mmr_pnode = uv_blade_to_pnode(mmr_blade); @@ -3876,6 +3816,75 @@ int __init arch_probe_nr_irqs(void) } #endif +static int __io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) +{ + struct irq_desc *desc; + struct irq_cfg *cfg; + int node; + int ioapic, pin; + int trigger, polarity; + + ioapic = irq_attr->ioapic; + if (!IO_APIC_IRQ(irq)) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + ioapic); + return -EINVAL; + } + + if (dev) + node = dev_to_node(dev); + else + node = cpu_to_node(boot_cpu_id); + + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc %d\n", irq); + return 0; + } + + pin = irq_attr->ioapic_pin; + trigger = irq_attr->trigger; + polarity = irq_attr->polarity; + + /* + * IRQs < 16 are already in the irq_2_pin[] map + */ + if (irq >= NR_IRQS_LEGACY) { + cfg = desc->chip_data; + if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { + printk(KERN_INFO "can not add pin %d for irq %d\n", + pin, irq); + return 0; + } + } + + setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); + + return 0; +} + +int io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) +{ + int ioapic, pin; + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->gsi mappings (but unique PCI devices); + * we only program the IOAPIC on the first. + */ + ioapic = irq_attr->ioapic; + pin = irq_attr->ioapic_pin; + if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", + mp_ioapics[ioapic].apicid, pin); + return 0; + } + set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed); + + return __io_apic_set_pci_routing(dev, irq, irq_attr); +} + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -3956,6 +3965,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) return apic_id; } +#endif int __init io_apic_get_version(int ioapic) { @@ -3968,39 +3978,6 @@ int __init io_apic_get_version(int ioapic) return reg_01.bits.version; } -#endif - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) -{ - struct irq_desc *desc; - struct irq_cfg *cfg; - int cpu = boot_cpu_id; - - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - desc = irq_to_desc_alloc_cpu(irq, cpu); - if (!desc) { - printk(KERN_INFO "can not get irq_desc %d\n", irq); - return 0; - } - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= NR_IRQS_LEGACY) { - cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); - } - - setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); - - return 0; -} - int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) { @@ -4031,51 +4008,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) #ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { - int pin, ioapic, irq, irq_entry; + int pin, ioapic = 0, irq, irq_entry; struct irq_desc *desc; - struct irq_cfg *cfg; const struct cpumask *mask; if (skip_ioapic_setup == 1) return; - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - - /* setup_IO_APIC_irqs could fail to get vector for some device - * when you have too many devices, because at that time only boot - * cpu is online. - */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - if (!cfg->vector) { - setup_IO_APIC_irq(ioapic, pin, irq, desc, - irq_trigger(irq_entry), - irq_polarity(irq_entry)); - continue; +#ifdef CONFIG_ACPI + if (!acpi_disabled && acpi_ioapic) { + ioapic = mp_find_ioapic(0); + if (ioapic < 0) + ioapic = 0; + } +#endif - } + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + irq_entry = find_irq_entry(ioapic, pin, mp_INT); + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); - /* - * Honour affinities which have been set in early boot - */ - if (desc->status & - (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = desc->affinity; - else - mask = apic->target_cpus(); + desc = irq_to_desc(irq); - if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq_desc(desc, mask); - else - set_ioapic_affinity_irq_desc(desc, mask); - } + /* + * Honour affinities which have been set in early boot + */ + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) + mask = desc->affinity; + else + mask = apic->target_cpus(); + if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq_desc(desc, mask); + else + set_ioapic_affinity_irq_desc(desc, mask); } + } #endif @@ -4083,7 +4053,7 @@ void __init setup_ioapic_dest(void) static struct resource *ioapic_resources; -static struct resource * __init ioapic_setup_resources(void) +static struct resource * __init ioapic_setup_resources(int nr_ioapics) { unsigned long n; struct resource *res; @@ -4099,15 +4069,13 @@ static struct resource * __init ioapic_setup_resources(void) mem = alloc_bootmem(n); res = (void *)mem; - if (mem != NULL) { - mem += sizeof(struct resource) * nr_ioapics; + mem += sizeof(struct resource) * nr_ioapics; - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; } ioapic_resources = res; @@ -4121,7 +4089,7 @@ void __init ioapic_init_mappings(void) struct resource *ioapic_res; int i; - ioapic_res = ioapic_setup_resources(); + ioapic_res = ioapic_setup_resources(nr_ioapics); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].apicaddr; @@ -4150,11 +4118,9 @@ fake_ioapic_page: __fix_to_virt(idx), ioapic_phys); idx++; - if (ioapic_res != NULL) { - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; - ioapic_res++; - } + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; } }