x86, ioapic: Get rid of needless check and simplify ioapic_setup_resources()
[safe/jmp/linux-2.6] / arch / x86 / kernel / apic / io_apic.c
index 5c7630b..d836b4d 100644 (file)
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
+#include <asm/hw_irq.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_irq.h>
 
 #include <asm/apic.h>
 
 #define __apicdebuginit(type) static type __init
+#define for_each_irq_pin(entry, head) \
+       for (entry = head; entry; entry = entry->next)
 
 /*
  *      Is the SiS APIC rmw bug present ?
@@ -115,32 +118,25 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-struct irq_pin_list;
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
 struct irq_pin_list {
        int apic, pin;
        struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 {
        struct irq_pin_list *pin;
-       int node;
-
-       node = cpu_to_node(cpu);
 
        pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
 
        return pin;
 }
 
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
 struct irq_cfg {
        struct irq_pin_list *irq_2_pin;
        cpumask_var_t domain;
@@ -179,16 +175,18 @@ int __init arch_early_irq_init(void)
        struct irq_cfg *cfg;
        struct irq_desc *desc;
        int count;
+       int node;
        int i;
 
        cfg = irq_cfgx;
        count = ARRAY_SIZE(irq_cfgx);
+       node= cpu_to_node(boot_cpu_id);
 
        for (i = 0; i < count; i++) {
                desc = irq_to_desc(i);
                desc->chip_data = &cfg[i];
-               alloc_bootmem_cpumask_var(&cfg[i].domain);
-               alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+               zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
+               zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
                if (i < NR_IRQS_LEGACY)
                        cpumask_setall(cfg[i].domain);
        }
@@ -209,12 +207,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
        return cfg;
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
 {
        struct irq_cfg *cfg;
-       int node;
-
-       node = cpu_to_node(cpu);
 
        cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
        if (cfg) {
@@ -235,13 +230,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
        return cfg;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
 {
        struct irq_cfg *cfg;
 
        cfg = desc->chip_data;
        if (!cfg) {
-               desc->chip_data = get_one_free_irq_cfg(cpu);
+               desc->chip_data = get_one_free_irq_cfg(node);
                if (!desc->chip_data) {
                        printk(KERN_ERR "can not alloc irq_cfg\n");
                        BUG_ON(1);
@@ -253,7 +248,7 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
 
 /* for move_irq_desc */
 static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
 {
        struct irq_pin_list *old_entry, *head, *tail, *entry;
 
@@ -262,7 +257,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
        if (!old_entry)
                return;
 
-       entry = get_one_free_irq_2_pin(cpu);
+       entry = get_one_free_irq_2_pin(node);
        if (!entry)
                return;
 
@@ -272,7 +267,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
        tail            = entry;
        old_entry       = old_entry->next;
        while (old_entry) {
-               entry = get_one_free_irq_2_pin(cpu);
+               entry = get_one_free_irq_2_pin(node);
                if (!entry) {
                        entry = head;
                        while (entry) {
@@ -312,12 +307,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
 }
 
 void arch_init_copy_chip_data(struct irq_desc *old_desc,
-                                struct irq_desc *desc, int cpu)
+                                struct irq_desc *desc, int node)
 {
        struct irq_cfg *cfg;
        struct irq_cfg *old_cfg;
 
-       cfg = get_one_free_irq_cfg(cpu);
+       cfg = get_one_free_irq_cfg(node);
 
        if (!cfg)
                return;
@@ -328,7 +323,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
 
        memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
-       init_copy_irq_2_pin(old_cfg, cfg, cpu);
+       init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
 static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -417,13 +412,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
        unsigned long flags;
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       entry = cfg->irq_2_pin;
-       for (;;) {
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
                unsigned int reg;
                int pin;
 
-               if (!entry)
-                       break;
                pin = entry->pin;
                reg = io_apic_read(entry->apic, 0x10 + pin*2);
                /* Is the remote IRR bit set? */
@@ -431,9 +423,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
                        spin_unlock_irqrestore(&ioapic_lock, flags);
                        return true;
                }
-               if (!entry->next)
-                       break;
-               entry = entry->next;
        }
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
@@ -465,7 +454,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 static void
 __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
-       union entry_union eu;
+       union entry_union eu = {{0, 0}};
+
        eu.entry = e;
        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -495,192 +485,73 @@ static void ioapic_mask_entry(int apic, int pin)
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
-       cpumask_var_t cleanup_mask;
-
-       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-               unsigned int i;
-               cfg->move_cleanup_count = 0;
-               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-                       cfg->move_cleanup_count++;
-               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-                       apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-       } else {
-               cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
-               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               free_cpumask_var(cleanup_mask);
-       }
-       cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
-       int apic, pin;
-       struct irq_pin_list *entry;
-       u8 vector = cfg->vector;
-
-       entry = cfg->irq_2_pin;
-       for (;;) {
-               unsigned int reg;
-
-               if (!entry)
-                       break;
-
-               apic = entry->apic;
-               pin = entry->pin;
-               /*
-                * With interrupt-remapping, destination information comes
-                * from interrupt-remapping table entry.
-                */
-               if (!irq_remapped(irq))
-                       io_apic_write(apic, 0x11 + pin*2, dest);
-               reg = io_apic_read(apic, 0x10 + pin*2);
-               reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-               reg |= vector;
-               io_apic_modify(apic, 0x10 + pin*2, reg);
-               if (!entry->next)
-                       break;
-               entry = entry->next;
-       }
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg;
-       unsigned int irq;
-
-       if (!cpumask_intersects(mask, cpu_online_mask))
-               return BAD_APICID;
-
-       irq = desc->irq;
-       cfg = desc->chip_data;
-       if (assign_irq_vector(irq, cfg, mask))
-               return BAD_APICID;
-
-       cpumask_copy(desc->affinity, mask);
-
-       return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
-}
-
-static int
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg;
-       unsigned long flags;
-       unsigned int dest;
-       unsigned int irq;
-       int ret = -1;
-
-       irq = desc->irq;
-       cfg = desc->chip_data;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       dest = set_desc_affinity(desc, mask);
-       if (dest != BAD_APICID) {
-               /* Only the high 8 bits are valid. */
-               dest = SET_APIC_LOGICAL_ID(dest);
-               __target_IO_APIC_irq(irq, dest, cfg);
-               ret = 0;
-       }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       return ret;
-}
-
-static int
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
-       struct irq_desc *desc;
-
-       desc = irq_to_desc(irq);
-
-       return set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static int
+add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-       struct irq_pin_list *entry;
+       struct irq_pin_list **last, *entry;
 
-       entry = cfg->irq_2_pin;
-       if (!entry) {
-               entry = get_one_free_irq_2_pin(cpu);
-               if (!entry) {
-                       printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
-                                       apic, pin);
-                       return;
-               }
-               cfg->irq_2_pin = entry;
-               entry->apic = apic;
-               entry->pin = pin;
-               return;
-       }
-
-       while (entry->next) {
-               /* not again, please */
+       /* don't allow duplicates */
+       last = &cfg->irq_2_pin;
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
                if (entry->apic == apic && entry->pin == pin)
-                       return;
-
-               entry = entry->next;
+                       return 0;
+               last = &entry->next;
        }
 
-       entry->next = get_one_free_irq_2_pin(cpu);
-       entry = entry->next;
+       entry = get_one_free_irq_2_pin(node);
+       if (!entry) {
+               printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
+                               node, apic, pin);
+               return -ENOMEM;
+       }
        entry->apic = apic;
        entry->pin = pin;
+
+       *last = entry;
+       return 0;
+}
+
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+       if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+               panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
 }
 
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
-                                     int oldapic, int oldpin,
-                                     int newapic, int newpin)
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
+                                          int oldapic, int oldpin,
+                                          int newapic, int newpin)
 {
-       struct irq_pin_list *entry = cfg->irq_2_pin;
-       int replaced = 0;
+       struct irq_pin_list *entry;
 
-       while (entry) {
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
                if (entry->apic == oldapic && entry->pin == oldpin) {
                        entry->apic = newapic;
                        entry->pin = newpin;
-                       replaced = 1;
                        /* every one is different, right? */
-                       break;
+                       return;
                }
-               entry = entry->next;
        }
 
-       /* why? call replace before add? */
-       if (!replaced)
-               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+       /* old apic/pin didn't exist, so just add new ones */
+       add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
-static inline void io_apic_modify_irq(struct irq_cfg *cfg,
-                               int mask_and, int mask_or,
-                               void (*final)(struct irq_pin_list *entry))
+static void io_apic_modify_irq(struct irq_cfg *cfg,
+                              int mask_and, int mask_or,
+                              void (*final)(struct irq_pin_list *entry))
 {
        int pin;
        struct irq_pin_list *entry;
 
-       for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
                unsigned int reg;
                pin = entry->pin;
                reg = io_apic_read(entry->apic, 0x10 + pin * 2);
@@ -697,7 +568,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
        io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
-#ifdef CONFIG_X86_64
 static void io_apic_sync(struct irq_pin_list *entry)
 {
        /*
@@ -713,11 +583,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 {
        io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
-#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
-}
 
 static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
 {
@@ -730,7 +595,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
        io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
                        IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
-#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
@@ -828,7 +692,6 @@ static int __init ioapic_pirq_setup(char *str)
 __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_INTR_REMAP
 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
 {
        int apic;
@@ -926,20 +789,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
        return 0;
 }
 
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
-       struct IO_APIC_route_entry **ioapic_entries)
-
-{
-       /*
-        * for now plain restore of previous settings.
-        * TBD: In the case of OS enabling interrupt-remapping,
-        * IO-APIC RTE's need to be setup to point to interrupt-remapping
-        * table entries. for now, do a plain restore, and wait for
-        * the setup_IO_APIC_irqs() to do proper initialization.
-        */
-       restore_IO_APIC_setup(ioapic_entries);
-}
-
 void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 {
        int apic;
@@ -949,7 +798,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 
        kfree(ioapic_entries);
 }
-#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -1010,54 +858,6 @@ static int __init find_isa_irq_apic(int irq, int type)
        return -1;
 }
 
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-       int apic, i, best_guess = -1;
-
-       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-               bus, slot, pin);
-       if (test_bit(bus, mp_bus_not_pci)) {
-               apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-               return -1;
-       }
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].srcbus;
-
-               for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
-                           mp_irqs[i].dstapic == MP_APIC_ALL)
-                               break;
-
-               if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].irqtype &&
-                   (bus == lbus) &&
-                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
-                       if (!(apic || IO_APIC_IRQ(irq)))
-                               continue;
-
-                       if (pin == (mp_irqs[i].srcbusirq & 3))
-                               return irq;
-                       /*
-                        * Use the first all-but-pin matching entry as a
-                        * best-guess fuzzy result for broken mptables.
-                        */
-                       if (best_guess < 0)
-                               best_guess = irq;
-               }
-       }
-       return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
@@ -1276,6 +1076,64 @@ static int pin_2_irq(int idx, int apic, int pin)
        return irq;
 }
 
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       int apic, i, best_guess = -1;
+
+       apic_printk(APIC_DEBUG,
+                   "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+                   bus, slot, pin);
+       if (test_bit(bus, mp_bus_not_pci)) {
+               apic_printk(APIC_VERBOSE,
+                           "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].srcbus;
+
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+                           mp_irqs[i].dstapic == MP_APIC_ALL)
+                               break;
+
+               if (!test_bit(lbus, mp_bus_not_pci) &&
+                   !mp_irqs[i].irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+
+                       if (pin == (mp_irqs[i].srcbusirq & 3)) {
+                               set_io_apic_irq_attr(irq_attr, apic,
+                                                    mp_irqs[i].dstirq,
+                                                    irq_trigger(i),
+                                                    irq_polarity(i));
+                               return irq;
+                       }
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0) {
+                               set_io_apic_irq_attr(irq_attr, apic,
+                                                    mp_irqs[i].dstirq,
+                                                    irq_trigger(i),
+                                                    irq_polarity(i));
+                               best_guess = irq;
+                       }
+               }
+       }
+       return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
 void lock_vector_lock(void)
 {
        /* Used to the online set of cpus does not change
@@ -1537,6 +1395,9 @@ int setup_ioapic_entry(int apic_id, int irq,
                irte.vector = vector;
                irte.dest_id = IRTE_DEST(destination);
 
+               /* Set source-id of interrupt request */
+               set_ioapic_sid(&irte, apic_id);
+
                modify_irte(irq, &irte);
 
                ir_entry->index2 = (index >> 15) & 0x1;
@@ -1606,58 +1467,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
        ioapic_write_entry(apic_id, pin, entry);
 }
 
+static struct {
+       DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
 static void __init setup_IO_APIC_irqs(void)
 {
-       int apic_id, pin, idx, irq;
+       int apic_id = 0, pin, idx, irq;
        int notcon = 0;
        struct irq_desc *desc;
        struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
+       int node = cpu_to_node(boot_cpu_id);
 
        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-       for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-               for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
-
-                       idx = find_irq_entry(apic_id, pin, mp_INT);
-                       if (idx == -1) {
-                               if (!notcon) {
-                                       notcon = 1;
-                                       apic_printk(APIC_VERBOSE,
-                                               KERN_DEBUG " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               } else
-                                       apic_printk(APIC_VERBOSE, " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               continue;
-                       }
-                       if (notcon) {
-                               apic_printk(APIC_VERBOSE,
-                                       " (apicid-pin) not connected\n");
-                               notcon = 0;
-                       }
+#ifdef CONFIG_ACPI
+       if (!acpi_disabled && acpi_ioapic) {
+               apic_id = mp_find_ioapic(0);
+               if (apic_id < 0)
+                       apic_id = 0;
+       }
+#endif
 
-                       irq = pin_2_irq(idx, apic_id, pin);
+       for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+               idx = find_irq_entry(apic_id, pin, mp_INT);
+               if (idx == -1) {
+                       if (!notcon) {
+                               notcon = 1;
+                               apic_printk(APIC_VERBOSE,
+                                       KERN_DEBUG " %d-%d",
+                                       mp_ioapics[apic_id].apicid, pin);
+                       } else
+                               apic_printk(APIC_VERBOSE, " %d-%d",
+                                       mp_ioapics[apic_id].apicid, pin);
+                       continue;
+               }
+               if (notcon) {
+                       apic_printk(APIC_VERBOSE,
+                               " (apicid-pin) not connected\n");
+                       notcon = 0;
+               }
 
-                       /*
-                        * Skip the timer IRQ if there's a quirk handler
-                        * installed and if it returns 1:
-                        */
-                       if (apic->multi_timer_check &&
-                                       apic->multi_timer_check(apic_id, irq))
-                               continue;
+               irq = pin_2_irq(idx, apic_id, pin);
 
-                       desc = irq_to_desc_alloc_cpu(irq, cpu);
-                       if (!desc) {
-                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-                               continue;
-                       }
-                       cfg = desc->chip_data;
-                       add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
+               /*
+                * Skip the timer IRQ if there's a quirk handler
+                * installed and if it returns 1:
+                */
+               if (apic->multi_timer_check &&
+                               apic->multi_timer_check(apic_id, irq))
+                       continue;
 
-                       setup_IO_APIC_irq(apic_id, pin, irq, desc,
-                                       irq_trigger(idx), irq_polarity(idx));
+               desc = irq_to_desc_alloc_node(irq, node);
+               if (!desc) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                       continue;
                }
+               cfg = desc->chip_data;
+               add_pin_to_irq_node(cfg, node, apic_id, pin);
+               /*
+                * don't mark it in pin_programmed, so later acpi could
+                * set it correctly when irq < 16
+                */
+               setup_IO_APIC_irq(apic_id, pin, irq, desc,
+                               irq_trigger(idx), irq_polarity(idx));
        }
 
        if (notcon)
@@ -1810,12 +1683,8 @@ __apicdebuginit(void) print_IO_APIC(void)
                if (!entry)
                        continue;
                printk(KERN_DEBUG "IRQ%d ", irq);
-               for (;;) {
+               for_each_irq_pin(entry, cfg->irq_2_pin)
                        printk("-> %d:%d", entry->apic, entry->pin);
-                       if (!entry->next)
-                               break;
-                       entry = entry->next;
-               }
                printk("\n");
        }
 
@@ -1824,36 +1693,30 @@ __apicdebuginit(void) print_IO_APIC(void)
        return;
 }
 
-__apicdebuginit(void) print_APIC_bitfield(int base)
+__apicdebuginit(void) print_APIC_field(int base)
 {
-       unsigned int v;
-       int i, j;
+       int i;
 
        if (apic_verbosity == APIC_QUIET)
                return;
 
-       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-       for (i = 0; i < 8; i++) {
-               v = apic_read(base + i*0x10);
-               for (j = 0; j < 32; j++) {
-                       if (v & (1<<j))
-                               printk("1");
-                       else
-                               printk("0");
-               }
-               printk("\n");
-       }
+       printk(KERN_DEBUG);
+
+       for (i = 0; i < 8; i++)
+               printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+
+       printk(KERN_CONT "\n");
 }
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
-       unsigned int v, ver, maxlvt;
+       unsigned int i, v, ver, maxlvt;
        u64 icr;
 
        if (apic_verbosity == APIC_QUIET)
                return;
 
-       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+       printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
        printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
@@ -1894,11 +1757,11 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
        printk(KERN_DEBUG "... APIC ISR field:\n");
-       print_APIC_bitfield(APIC_ISR);
+       print_APIC_field(APIC_ISR);
        printk(KERN_DEBUG "... APIC TMR field:\n");
-       print_APIC_bitfield(APIC_TMR);
+       print_APIC_field(APIC_TMR);
        printk(KERN_DEBUG "... APIC IRR field:\n");
-       print_APIC_bitfield(APIC_IRR);
+       print_APIC_field(APIC_IRR);
 
        if (APIC_INTEGRATED(ver)) {             /* !82489DX */
                if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
@@ -1935,6 +1798,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
        v = apic_read(APIC_TDCR);
        printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+       if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+               v = apic_read(APIC_EFEAT);
+               maxlvt = (v >> 16) & 0xff;
+               printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+               v = apic_read(APIC_ECTRL);
+               printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+               for (i = 0; i < maxlvt; i++) {
+                       v = apic_read(APIC_EILVTn(i));
+                       printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+               }
+       }
        printk("\n");
 }
 
@@ -1983,6 +1858,11 @@ __apicdebuginit(void) print_PIC(void)
 __apicdebuginit(int) print_all_ICs(void)
 {
        print_PIC();
+
+       /* don't print out if apic is not there */
+       if (!cpu_has_apic || disable_apic)
+               return 0;
+
        print_all_local_APICs();
        print_IO_APIC();
 
@@ -2098,7 +1978,9 @@ void disable_IO_APIC(void)
        /*
         * Use virtual wire A mode when interrupt remapping is enabled.
         */
-       disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
+       if (cpu_has_apic)
+               disconnect_bsp_APIC(!intr_remapping_enabled &&
+                               ioapic_i8259.pin != -1);
 }
 
 #ifdef CONFIG_X86_32
@@ -2286,58 +2168,154 @@ static int __init timer_irq_works(void)
  * This is not complete - we should be able to fake
  * an edge even if it isn't on the 8259A...
  */
-
-static unsigned int startup_ioapic_irq(unsigned int irq)
+
+static unsigned int startup_ioapic_irq(unsigned int irq)
+{
+       int was_pending = 0;
+       unsigned long flags;
+       struct irq_cfg *cfg;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < NR_IRQS_LEGACY) {
+               disable_8259A_irq(irq);
+               if (i8259A_irq_pending(irq))
+                       was_pending = 1;
+       }
+       cfg = irq_cfg(irq);
+       __unmask_IO_APIC_irq(cfg);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return was_pending;
+}
+
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+
+       struct irq_cfg *cfg = irq_cfg(irq);
+       unsigned long flags;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       return 1;
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+#ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+       cpumask_var_t cleanup_mask;
+
+       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+               unsigned int i;
+               cfg->move_cleanup_count = 0;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       cfg->move_cleanup_count++;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+       } else {
+               cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               free_cpumask_var(cleanup_mask);
+       }
+       cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+       int apic, pin;
+       struct irq_pin_list *entry;
+       u8 vector = cfg->vector;
+
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
+               unsigned int reg;
+
+               apic = entry->apic;
+               pin = entry->pin;
+               /*
+                * With interrupt-remapping, destination information comes
+                * from interrupt-remapping table entry.
+                */
+               if (!irq_remapped(irq))
+                       io_apic_write(apic, 0x11 + pin*2, dest);
+               reg = io_apic_read(apic, 0x10 + pin*2);
+               reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+               reg |= vector;
+               io_apic_modify(apic, 0x10 + pin*2, reg);
+       }
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 {
-       int was_pending = 0;
-       unsigned long flags;
        struct irq_cfg *cfg;
+       unsigned int irq;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < NR_IRQS_LEGACY) {
-               disable_8259A_irq(irq);
-               if (i8259A_irq_pending(irq))
-                       was_pending = 1;
-       }
-       cfg = irq_cfg(irq);
-       __unmask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       if (!cpumask_intersects(mask, cpu_online_mask))
+               return BAD_APICID;
 
-       return was_pending;
+       irq = desc->irq;
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
+               return BAD_APICID;
+
+       cpumask_copy(desc->affinity, mask);
+
+       return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
 }
 
-#ifdef CONFIG_X86_64
-static int ioapic_retrigger_irq(unsigned int irq)
+static int
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
-
-       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_cfg *cfg;
        unsigned long flags;
+       unsigned int dest;
+       unsigned int irq;
+       int ret = -1;
 
-       spin_lock_irqsave(&vector_lock, flags);
-       apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
-       spin_unlock_irqrestore(&vector_lock, flags);
+       irq = desc->irq;
+       cfg = desc->chip_data;
 
-       return 1;
-}
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-       apic->send_IPI_self(irq_cfg(irq)->vector);
+       spin_lock_irqsave(&ioapic_lock, flags);
+       dest = set_desc_affinity(desc, mask);
+       if (dest != BAD_APICID) {
+               /* Only the high 8 bits are valid. */
+               dest = SET_APIC_LOGICAL_ID(dest);
+               __target_IO_APIC_irq(irq, dest, cfg);
+               ret = 0;
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
 
-       return 1;
+       return ret;
 }
-#endif
 
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
+static int
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc;
 
-#ifdef CONFIG_SMP
+       desc = irq_to_desc(irq);
+
+       return set_ioapic_affinity_irq_desc(desc, mask);
+}
 
 #ifdef CONFIG_INTR_REMAP
 
@@ -2484,53 +2462,6 @@ static void irq_complete_move(struct irq_desc **descp)
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
 
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-       int apic, pin;
-       struct irq_pin_list *entry;
-
-       entry = cfg->irq_2_pin;
-       for (;;) {
-
-               if (!entry)
-                       break;
-
-               apic = entry->apic;
-               pin = entry->pin;
-               io_apic_eoi(apic, pin);
-               entry = entry->next;
-       }
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
-       struct irq_cfg *cfg;
-       unsigned long flags;
-       unsigned int irq;
-
-       irq = desc->irq;
-       cfg = desc->chip_data;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __eoi_ioapic_irq(irq, cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-       ack_x2APIC_irq();
-       eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-       ack_x2APIC_irq();
-}
-#endif
-
 static void ack_apic_edge(unsigned int irq)
 {
        struct irq_desc *desc = irq_to_desc(irq);
@@ -2545,11 +2476,8 @@ atomic_t irq_mis_count;
 static void ack_apic_level(unsigned int irq)
 {
        struct irq_desc *desc = irq_to_desc(irq);
-
-#ifdef CONFIG_X86_32
        unsigned long v;
        int i;
-#endif
        struct irq_cfg *cfg;
        int do_unmask_irq = 0;
 
@@ -2562,31 +2490,28 @@ static void ack_apic_level(unsigned int irq)
        }
 #endif
 
-#ifdef CONFIG_X86_32
        /*
-       * It appears there is an erratum which affects at least version 0x11
-       * of I/O APIC (that's the 82093AA and cores integrated into various
-       * chipsets).  Under certain conditions a level-triggered interrupt is
-       * erroneously delivered as edge-triggered one but the respective IRR
-       * bit gets set nevertheless.  As a result the I/O unit expects an EOI
-       * message but it will never arrive and further interrupts are blocked
-       * from the source.  The exact reason is so far unknown, but the
-       * phenomenon was observed when two consecutive interrupt requests
-       * from a given source get delivered to the same CPU and the source is
-       * temporarily disabled in between.
-       *
-       * A workaround is to simulate an EOI message manually.  We achieve it
-       * by setting the trigger mode to edge and then to level when the edge
-       * trigger mode gets detected in the TMR of a local APIC for a
-       * level-triggered interrupt.  We mask the source for the time of the
-       * operation to prevent an edge-triggered interrupt escaping meanwhile.
-       * The idea is from Manfred Spraul.  --macro
-       */
+        * It appears there is an erratum which affects at least version 0x11
+        * of I/O APIC (that's the 82093AA and cores integrated into various
+        * chipsets).  Under certain conditions a level-triggered interrupt is
+        * erroneously delivered as edge-triggered one but the respective IRR
+        * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+        * message but it will never arrive and further interrupts are blocked
+        * from the source.  The exact reason is so far unknown, but the
+        * phenomenon was observed when two consecutive interrupt requests
+        * from a given source get delivered to the same CPU and the source is
+        * temporarily disabled in between.
+        *
+        * A workaround is to simulate an EOI message manually.  We achieve it
+        * by setting the trigger mode to edge and then to level when the edge
+        * trigger mode gets detected in the TMR of a local APIC for a
+        * level-triggered interrupt.  We mask the source for the time of the
+        * operation to prevent an edge-triggered interrupt escaping meanwhile.
+        * The idea is from Manfred Spraul.  --macro
+        */
        cfg = desc->chip_data;
        i = cfg->vector;
-
        v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-#endif
 
        /*
         * We must acknowledge the irq before we move it or the acknowledge will
@@ -2594,9 +2519,6 @@ static void ack_apic_level(unsigned int irq)
         */
        ack_APIC_irq();
 
-       if (irq_remapped(irq))
-               eoi_ioapic_irq(desc);
-
        /* Now we can move and renable the irq */
        if (unlikely(do_unmask_irq)) {
                /* Only migrate the irq if the ack has been received.
@@ -2631,7 +2553,7 @@ static void ack_apic_level(unsigned int irq)
                unmask_IO_APIC_irq_desc(desc);
        }
 
-#ifdef CONFIG_X86_32
+       /* Tail end of version 0x11 I/O APIC bug workaround */
        if (!(v & (1 << (i & 0x1f)))) {
                atomic_inc(&irq_mis_count);
                spin_lock(&ioapic_lock);
@@ -2639,26 +2561,43 @@ static void ack_apic_level(unsigned int irq)
                __unmask_and_level_IO_APIC_irq(cfg);
                spin_unlock(&ioapic_lock);
        }
-#endif
 }
 
 #ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+       struct irq_pin_list *entry;
+
+       for_each_irq_pin(entry, cfg->irq_2_pin)
+               io_apic_eoi(entry->apic, entry->pin);
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+       struct irq_cfg *cfg;
+       unsigned long flags;
+       unsigned int irq;
+
+       irq = desc->irq;
+       cfg = desc->chip_data;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __eoi_ioapic_irq(irq, cfg);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void ir_ack_apic_edge(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_edge(irq);
-#endif
-       return ack_apic_edge(irq);
+       ack_APIC_irq();
 }
 
 static void ir_ack_apic_level(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_level(irq);
-#endif
-       return ack_apic_level(irq);
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       ack_APIC_irq();
+       eoi_ioapic_irq(desc);
 }
 #endif /* CONFIG_INTR_REMAP */
 
@@ -2863,7 +2802,7 @@ static inline void __init check_timer(void)
 {
        struct irq_desc *desc = irq_to_desc(0);
        struct irq_cfg *cfg = desc->chip_data;
-       int cpu = boot_cpu_id;
+       int node = cpu_to_node(boot_cpu_id);
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
        int no_pin1 = 0;
@@ -2929,7 +2868,7 @@ static inline void __init check_timer(void)
                 * Ok, does IRQ0 through the IOAPIC work?
                 */
                if (no_pin1) {
-                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+                       add_pin_to_irq_node(cfg, node, apic1, pin1);
                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                } else {
                        /* for edge trigger, setup_IO_APIC_irq already
@@ -2966,7 +2905,7 @@ static inline void __init check_timer(void)
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+               replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
                setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
                enable_8259A_irq(0);
                if (timer_irq_works()) {
@@ -3178,14 +3117,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
        /* Allocate an unused irq */
        unsigned int irq;
        unsigned int new;
        unsigned long flags;
        struct irq_cfg *cfg_new = NULL;
-       int cpu = boot_cpu_id;
        struct irq_desc *desc_new = NULL;
 
        irq = 0;
@@ -3194,7 +3132,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
        spin_lock_irqsave(&vector_lock, flags);
        for (new = irq_want; new < nr_irqs; new++) {
-               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               desc_new = irq_to_desc_alloc_node(new, node);
                if (!desc_new) {
                        printk(KERN_INFO "can not get irq_desc for %d\n", new);
                        continue;
@@ -3203,6 +3141,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
                if (cfg_new->vector != 0)
                        continue;
+
+               desc_new = move_irq_desc(desc_new, node);
+
                if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
                        irq = new;
                break;
@@ -3220,11 +3161,12 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 int create_irq(void)
 {
+       int node = cpu_to_node(boot_cpu_id);
        unsigned int irq_want;
        int irq;
 
        irq_want = nr_irqs_gsi;
-       irq = create_irq_nr(irq_want);
+       irq = create_irq_nr(irq_want, node);
 
        if (irq == 0)
                irq = -1;
@@ -3243,8 +3185,7 @@ void destroy_irq(unsigned int irq)
        cfg = desc->chip_data;
        dynamic_irq_cleanup(irq);
        /* connect back irq_cfg */
-       if (desc)
-               desc->chip_data = cfg;
+       desc->chip_data = cfg;
 
        free_irte(irq);
        spin_lock_irqsave(&vector_lock, flags);
@@ -3289,6 +3230,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
                irte.vector = cfg->vector;
                irte.dest_id = IRTE_DEST(dest);
 
+               /* Set source-id of interrupt request */
+               set_msi_sid(&irte, pdev);
+
                modify_irte(irq, &irte);
 
                msg->address_hi = MSI_ADDR_BASE_HI;
@@ -3482,15 +3426,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
        unsigned int irq_want;
        struct intel_iommu *iommu = NULL;
        int index = 0;
+       int node;
 
        /* x86 doesn't support multiple MSI yet */
        if (type == PCI_CAP_ID_MSI && nvec > 1)
                return 1;
 
+       node = dev_to_node(&dev->dev);
        irq_want = nr_irqs_gsi;
        sub_handle = 0;
        list_for_each_entry(msidesc, &dev->msi_list, list) {
-               irq = create_irq_nr(irq_want);
+               irq = create_irq_nr(irq_want, node);
                if (irq == 0)
                        return -1;
                irq_want = irq + 1;
@@ -3567,7 +3513,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 #endif /* CONFIG_SMP */
 
-struct irq_chip dmar_msi_type = {
+static struct irq_chip dmar_msi_type = {
        .name = "DMAR_MSI",
        .unmask = dmar_msi_unmask,
        .mask = dmar_msi_mask,
@@ -3764,6 +3710,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
        unsigned long flags;
        int err;
 
+       BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
        cfg = irq_cfg(irq);
 
        err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3777,15 +3725,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 
        mmr_value = 0;
        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-       entry->vector = cfg->vector;
-       entry->delivery_mode = apic->irq_delivery_mode;
-       entry->dest_mode = apic->irq_dest_mode;
-       entry->polarity = 0;
-       entry->trigger = 0;
-       entry->mask = 0;
-       entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+       entry->vector           = cfg->vector;
+       entry->delivery_mode    = apic->irq_delivery_mode;
+       entry->dest_mode        = apic->irq_dest_mode;
+       entry->polarity         = 0;
+       entry->trigger          = 0;
+       entry->mask             = 0;
+       entry->dest             = apic->cpu_mask_to_apicid(eligible_cpu);
 
        mmr_pnode = uv_blade_to_pnode(mmr_blade);
        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3803,10 +3749,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
        struct uv_IO_APIC_route_entry *entry;
        int mmr_pnode;
 
+       BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
        mmr_value = 0;
        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
        entry->mask = 1;
 
        mmr_pnode = uv_blade_to_pnode(mmr_blade);
@@ -3870,6 +3816,75 @@ int __init arch_probe_nr_irqs(void)
 }
 #endif
 
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int node;
+       int ioapic, pin;
+       int trigger, polarity;
+
+       ioapic = irq_attr->ioapic;
+       if (!IO_APIC_IRQ(irq)) {
+               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+                       ioapic);
+               return -EINVAL;
+       }
+
+       if (dev)
+               node = dev_to_node(dev);
+       else
+               node = cpu_to_node(boot_cpu_id);
+
+       desc = irq_to_desc_alloc_node(irq, node);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
+
+       pin = irq_attr->ioapic_pin;
+       trigger = irq_attr->trigger;
+       polarity = irq_attr->polarity;
+
+       /*
+        * IRQs < 16 are already in the irq_2_pin[] map
+        */
+       if (irq >= NR_IRQS_LEGACY) {
+               cfg = desc->chip_data;
+               if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+                       printk(KERN_INFO "can not add pin %d for irq %d\n",
+                               pin, irq);
+                       return 0;
+               }
+       }
+
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+       return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       int ioapic, pin;
+       /*
+        * Avoid pin reprogramming.  PRTs typically include entries
+        * with redundant pin->gsi mappings (but unique PCI devices);
+        * we only program the IOAPIC on the first.
+        */
+       ioapic = irq_attr->ioapic;
+       pin = irq_attr->ioapic_pin;
+       if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+               pr_debug("Pin %d-%d already programmed\n",
+                        mp_ioapics[ioapic].apicid, pin);
+               return 0;
+       }
+       set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+       return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
@@ -3950,6 +3965,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 
        return apic_id;
 }
+#endif
 
 int __init io_apic_get_version(int ioapic)
 {
@@ -3962,39 +3978,6 @@ int __init io_apic_get_version(int ioapic)
 
        return reg_01.bits.version;
 }
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-       struct irq_desc *desc;
-       struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
-
-       if (!IO_APIC_IRQ(irq)) {
-               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-                       ioapic);
-               return -EINVAL;
-       }
-
-       desc = irq_to_desc_alloc_cpu(irq, cpu);
-       if (!desc) {
-               printk(KERN_INFO "can not get irq_desc %d\n", irq);
-               return 0;
-       }
-
-       /*
-        * IRQs < 16 are already in the irq_2_pin[] map
-        */
-       if (irq >= NR_IRQS_LEGACY) {
-               cfg = desc->chip_data;
-               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
-       }
-
-       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
-       return 0;
-}
-
 
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
@@ -4025,51 +4008,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
 {
-       int pin, ioapic, irq, irq_entry;
+       int pin, ioapic = 0, irq, irq_entry;
        struct irq_desc *desc;
-       struct irq_cfg *cfg;
        const struct cpumask *mask;
 
        if (skip_ioapic_setup == 1)
                return;
 
-       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-                       if (irq_entry == -1)
-                               continue;
-                       irq = pin_2_irq(irq_entry, ioapic, pin);
-
-                       /* setup_IO_APIC_irqs could fail to get vector for some device
-                        * when you have too many devices, because at that time only boot
-                        * cpu is online.
-                        */
-                       desc = irq_to_desc(irq);
-                       cfg = desc->chip_data;
-                       if (!cfg->vector) {
-                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
-                                                 irq_trigger(irq_entry),
-                                                 irq_polarity(irq_entry));
-                               continue;
+#ifdef CONFIG_ACPI
+       if (!acpi_disabled && acpi_ioapic) {
+               ioapic = mp_find_ioapic(0);
+               if (ioapic < 0)
+                       ioapic = 0;
+       }
+#endif
 
-                       }
+       for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+               irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+               if (irq_entry == -1)
+                       continue;
+               irq = pin_2_irq(irq_entry, ioapic, pin);
 
-                       /*
-                        * Honour affinities which have been set in early boot
-                        */
-                       if (desc->status &
-                           (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-                               mask = desc->affinity;
-                       else
-                               mask = apic->target_cpus();
+               desc = irq_to_desc(irq);
 
-                       if (intr_remapping_enabled)
-                               set_ir_ioapic_affinity_irq_desc(desc, mask);
-                       else
-                               set_ioapic_affinity_irq_desc(desc, mask);
-               }
+               /*
+                * Honour affinities which have been set in early boot
+                */
+               if (desc->status &
+                   (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+                       mask = desc->affinity;
+               else
+                       mask = apic->target_cpus();
 
+               if (intr_remapping_enabled)
+                       set_ir_ioapic_affinity_irq_desc(desc, mask);
+               else
+                       set_ioapic_affinity_irq_desc(desc, mask);
        }
+
 }
 #endif
 
@@ -4077,7 +4053,7 @@ void __init setup_ioapic_dest(void)
 
 static struct resource *ioapic_resources;
 
-static struct resource * __init ioapic_setup_resources(void)
+static struct resource * __init ioapic_setup_resources(int nr_ioapics)
 {
        unsigned long n;
        struct resource *res;
@@ -4093,15 +4069,13 @@ static struct resource * __init ioapic_setup_resources(void)
        mem = alloc_bootmem(n);
        res = (void *)mem;
 
-       if (mem != NULL) {
-               mem += sizeof(struct resource) * nr_ioapics;
+       mem += sizeof(struct resource) * nr_ioapics;
 
-               for (i = 0; i < nr_ioapics; i++) {
-                       res[i].name = mem;
-                       res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-                       sprintf(mem,  "IOAPIC %u", i);
-                       mem += IOAPIC_RESOURCE_NAME_SIZE;
-               }
+       for (i = 0; i < nr_ioapics; i++) {
+               res[i].name = mem;
+               res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               sprintf(mem,  "IOAPIC %u", i);
+               mem += IOAPIC_RESOURCE_NAME_SIZE;
        }
 
        ioapic_resources = res;
@@ -4115,7 +4089,7 @@ void __init ioapic_init_mappings(void)
        struct resource *ioapic_res;
        int i;
 
-       ioapic_res = ioapic_setup_resources();
+       ioapic_res = ioapic_setup_resources(nr_ioapics);
        for (i = 0; i < nr_ioapics; i++) {
                if (smp_found_config) {
                        ioapic_phys = mp_ioapics[i].apicaddr;
@@ -4144,11 +4118,9 @@ fake_ioapic_page:
                            __fix_to_virt(idx), ioapic_phys);
                idx++;
 
-               if (ioapic_res != NULL) {
-                       ioapic_res->start = ioapic_phys;
-                       ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-                       ioapic_res++;
-               }
+               ioapic_res->start = ioapic_phys;
+               ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+               ioapic_res++;
        }
 }