x86: Sanitize smp_record and move it to x86_init_ops
[safe/jmp/linux-2.6] / arch / x86 / kernel / apic / io_apic.c
index 65b824c..d2ed6c5 100644 (file)
@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
+#include <asm/hw_irq.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_irq.h>
 
@@ -129,12 +130,9 @@ struct irq_pin_list {
        struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 {
        struct irq_pin_list *pin;
-       int node;
-
-       node = cpu_to_node(cpu);
 
        pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
 
@@ -148,9 +146,6 @@ struct irq_cfg {
        unsigned move_cleanup_count;
        u8 vector;
        u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-       u8 move_desc_pending : 1;
-#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -182,16 +177,18 @@ int __init arch_early_irq_init(void)
        struct irq_cfg *cfg;
        struct irq_desc *desc;
        int count;
+       int node;
        int i;
 
        cfg = irq_cfgx;
        count = ARRAY_SIZE(irq_cfgx);
+       node= cpu_to_node(boot_cpu_id);
 
        for (i = 0; i < count; i++) {
                desc = irq_to_desc(i);
                desc->chip_data = &cfg[i];
-               alloc_bootmem_cpumask_var(&cfg[i].domain);
-               alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+               zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
+               zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
                if (i < NR_IRQS_LEGACY)
                        cpumask_setall(cfg[i].domain);
        }
@@ -212,12 +209,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
        return cfg;
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
 {
        struct irq_cfg *cfg;
-       int node;
-
-       node = cpu_to_node(cpu);
 
        cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
        if (cfg) {
@@ -238,13 +232,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
        return cfg;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
 {
        struct irq_cfg *cfg;
 
        cfg = desc->chip_data;
        if (!cfg) {
-               desc->chip_data = get_one_free_irq_cfg(cpu);
+               desc->chip_data = get_one_free_irq_cfg(node);
                if (!desc->chip_data) {
                        printk(KERN_ERR "can not alloc irq_cfg\n");
                        BUG_ON(1);
@@ -254,10 +248,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
        return 0;
 }
 
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
 static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
 {
        struct irq_pin_list *old_entry, *head, *tail, *entry;
 
@@ -266,7 +259,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
        if (!old_entry)
                return;
 
-       entry = get_one_free_irq_2_pin(cpu);
+       entry = get_one_free_irq_2_pin(node);
        if (!entry)
                return;
 
@@ -276,7 +269,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
        tail            = entry;
        old_entry       = old_entry->next;
        while (old_entry) {
-               entry = get_one_free_irq_2_pin(cpu);
+               entry = get_one_free_irq_2_pin(node);
                if (!entry) {
                        entry = head;
                        while (entry) {
@@ -316,12 +309,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
 }
 
 void arch_init_copy_chip_data(struct irq_desc *old_desc,
-                                struct irq_desc *desc, int cpu)
+                                struct irq_desc *desc, int node)
 {
        struct irq_cfg *cfg;
        struct irq_cfg *old_cfg;
 
-       cfg = get_one_free_irq_cfg(cpu);
+       cfg = get_one_free_irq_cfg(node);
 
        if (!cfg)
                return;
@@ -332,7 +325,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
 
        memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
-       init_copy_irq_2_pin(old_cfg, cfg, cpu);
+       init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
 static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -356,19 +349,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
                old_desc->chip_data = NULL;
        }
 }
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg = desc->chip_data;
-
-       if (!cfg->move_in_progress) {
-               /* it means that domain is not changed */
-               if (!cpumask_intersects(desc->affinity, mask))
-                       cfg->move_desc_pending = 1;
-       }
-}
-#endif
+/* end for move_irq_desc */
 
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -378,13 +359,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
 struct io_apic {
        unsigned int index;
        unsigned int unused[3];
@@ -488,7 +462,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 static void
 __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
-       union entry_union eu;
+       union entry_union eu = {{0, 0}};
+
        eu.entry = e;
        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -523,13 +498,13 @@ static void ioapic_mask_entry(int apic, int pin)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
        struct irq_pin_list *entry;
 
        entry = cfg->irq_2_pin;
        if (!entry) {
-               entry = get_one_free_irq_2_pin(cpu);
+               entry = get_one_free_irq_2_pin(node);
                if (!entry) {
                        printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
                                        apic, pin);
@@ -549,7 +524,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
                entry = entry->next;
        }
 
-       entry->next = get_one_free_irq_2_pin(cpu);
+       entry->next = get_one_free_irq_2_pin(node);
        entry = entry->next;
        entry->apic = apic;
        entry->pin = pin;
@@ -558,7 +533,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
                                      int oldapic, int oldpin,
                                      int newapic, int newpin)
 {
@@ -578,7 +553,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
 
        /* why? call replace before add? */
        if (!replaced)
-               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+               add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -902,54 +877,6 @@ static int __init find_isa_irq_apic(int irq, int type)
        return -1;
 }
 
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-       int apic, i, best_guess = -1;
-
-       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-               bus, slot, pin);
-       if (test_bit(bus, mp_bus_not_pci)) {
-               apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-               return -1;
-       }
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].srcbus;
-
-               for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
-                           mp_irqs[i].dstapic == MP_APIC_ALL)
-                               break;
-
-               if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].irqtype &&
-                   (bus == lbus) &&
-                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
-                       if (!(apic || IO_APIC_IRQ(irq)))
-                               continue;
-
-                       if (pin == (mp_irqs[i].srcbusirq & 3))
-                               return irq;
-                       /*
-                        * Use the first all-but-pin matching entry as a
-                        * best-guess fuzzy result for broken mptables.
-                        */
-                       if (best_guess < 0)
-                               best_guess = irq;
-               }
-       }
-       return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
@@ -1168,6 +1095,64 @@ static int pin_2_irq(int idx, int apic, int pin)
        return irq;
 }
 
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       int apic, i, best_guess = -1;
+
+       apic_printk(APIC_DEBUG,
+                   "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+                   bus, slot, pin);
+       if (test_bit(bus, mp_bus_not_pci)) {
+               apic_printk(APIC_VERBOSE,
+                           "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].srcbus;
+
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+                           mp_irqs[i].dstapic == MP_APIC_ALL)
+                               break;
+
+               if (!test_bit(lbus, mp_bus_not_pci) &&
+                   !mp_irqs[i].irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+
+                       if (pin == (mp_irqs[i].srcbusirq & 3)) {
+                               set_io_apic_irq_attr(irq_attr, apic,
+                                                    mp_irqs[i].dstirq,
+                                                    irq_trigger(i),
+                                                    irq_polarity(i));
+                               return irq;
+                       }
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0) {
+                               set_io_apic_irq_attr(irq_attr, apic,
+                                                    mp_irqs[i].dstirq,
+                                                    irq_trigger(i),
+                                                    irq_polarity(i));
+                               best_guess = irq;
+                       }
+               }
+       }
+       return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
 void lock_vector_lock(void)
 {
        /* Used to the online set of cpus does not change
@@ -1429,6 +1414,9 @@ int setup_ioapic_entry(int apic_id, int irq,
                irte.vector = vector;
                irte.dest_id = IRTE_DEST(destination);
 
+               /* Set source-id of interrupt request */
+               set_ioapic_sid(&irte, apic_id);
+
                modify_irte(irq, &irte);
 
                ir_entry->index2 = (index >> 15) & 0x1;
@@ -1498,58 +1486,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
        ioapic_write_entry(apic_id, pin, entry);
 }
 
+static struct {
+       DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
 static void __init setup_IO_APIC_irqs(void)
 {
-       int apic_id, pin, idx, irq;
+       int apic_id = 0, pin, idx, irq;
        int notcon = 0;
        struct irq_desc *desc;
        struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
+       int node = cpu_to_node(boot_cpu_id);
 
        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-       for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-               for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
-
-                       idx = find_irq_entry(apic_id, pin, mp_INT);
-                       if (idx == -1) {
-                               if (!notcon) {
-                                       notcon = 1;
-                                       apic_printk(APIC_VERBOSE,
-                                               KERN_DEBUG " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               } else
-                                       apic_printk(APIC_VERBOSE, " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               continue;
-                       }
-                       if (notcon) {
-                               apic_printk(APIC_VERBOSE,
-                                       " (apicid-pin) not connected\n");
-                               notcon = 0;
-                       }
+#ifdef CONFIG_ACPI
+       if (!acpi_disabled && acpi_ioapic) {
+               apic_id = mp_find_ioapic(0);
+               if (apic_id < 0)
+                       apic_id = 0;
+       }
+#endif
 
-                       irq = pin_2_irq(idx, apic_id, pin);
+       for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+               idx = find_irq_entry(apic_id, pin, mp_INT);
+               if (idx == -1) {
+                       if (!notcon) {
+                               notcon = 1;
+                               apic_printk(APIC_VERBOSE,
+                                       KERN_DEBUG " %d-%d",
+                                       mp_ioapics[apic_id].apicid, pin);
+                       } else
+                               apic_printk(APIC_VERBOSE, " %d-%d",
+                                       mp_ioapics[apic_id].apicid, pin);
+                       continue;
+               }
+               if (notcon) {
+                       apic_printk(APIC_VERBOSE,
+                               " (apicid-pin) not connected\n");
+                       notcon = 0;
+               }
 
-                       /*
-                        * Skip the timer IRQ if there's a quirk handler
-                        * installed and if it returns 1:
-                        */
-                       if (apic->multi_timer_check &&
-                                       apic->multi_timer_check(apic_id, irq))
-                               continue;
+               irq = pin_2_irq(idx, apic_id, pin);
 
-                       desc = irq_to_desc_alloc_cpu(irq, cpu);
-                       if (!desc) {
-                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-                               continue;
-                       }
-                       cfg = desc->chip_data;
-                       add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
+               /*
+                * Skip the timer IRQ if there's a quirk handler
+                * installed and if it returns 1:
+                */
+               if (apic->multi_timer_check &&
+                               apic->multi_timer_check(apic_id, irq))
+                       continue;
 
-                       setup_IO_APIC_irq(apic_id, pin, irq, desc,
-                                       irq_trigger(idx), irq_polarity(idx));
+               desc = irq_to_desc_alloc_node(irq, node);
+               if (!desc) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                       continue;
                }
+               cfg = desc->chip_data;
+               add_pin_to_irq_node(cfg, node, apic_id, pin);
+               /*
+                * don't mark it in pin_programmed, so later acpi could
+                * set it correctly when irq < 16
+                */
+               setup_IO_APIC_irq(apic_id, pin, irq, desc,
+                               irq_trigger(idx), irq_polarity(idx));
        }
 
        if (notcon)
@@ -1716,25 +1716,19 @@ __apicdebuginit(void) print_IO_APIC(void)
        return;
 }
 
-__apicdebuginit(void) print_APIC_bitfield(int base)
+__apicdebuginit(void) print_APIC_field(int base)
 {
-       unsigned int v;
-       int i, j;
+       int i;
 
        if (apic_verbosity == APIC_QUIET)
                return;
 
-       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-       for (i = 0; i < 8; i++) {
-               v = apic_read(base + i*0x10);
-               for (j = 0; j < 32; j++) {
-                       if (v & (1<<j))
-                               printk("1");
-                       else
-                               printk("0");
-               }
-               printk("\n");
-       }
+       printk(KERN_DEBUG);
+
+       for (i = 0; i < 8; i++)
+               printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+
+       printk(KERN_CONT "\n");
 }
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
@@ -1745,7 +1739,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        if (apic_verbosity == APIC_QUIET)
                return;
 
-       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+       printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
        printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
@@ -1786,11 +1780,11 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
        printk(KERN_DEBUG "... APIC ISR field:\n");
-       print_APIC_bitfield(APIC_ISR);
+       print_APIC_field(APIC_ISR);
        printk(KERN_DEBUG "... APIC TMR field:\n");
-       print_APIC_bitfield(APIC_TMR);
+       print_APIC_field(APIC_TMR);
        printk(KERN_DEBUG "... APIC IRR field:\n");
-       print_APIC_bitfield(APIC_IRR);
+       print_APIC_field(APIC_IRR);
 
        if (APIC_INTEGRATED(ver)) {             /* !82489DX */
                if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
@@ -1887,6 +1881,11 @@ __apicdebuginit(void) print_PIC(void)
 __apicdebuginit(int) print_all_ICs(void)
 {
        print_PIC();
+
+       /* don't print out if apic is not there */
+       if (!cpu_has_apic || disable_apic)
+               return 0;
+
        print_all_local_APICs();
        print_IO_APIC();
 
@@ -2002,7 +2001,9 @@ void disable_IO_APIC(void)
        /*
         * Use virtual wire A mode when interrupt remapping is enabled.
         */
-       disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
+       if (cpu_has_apic)
+               disconnect_bsp_APIC(!intr_remapping_enabled &&
+                               ioapic_i8259.pin != -1);
 }
 
 #ifdef CONFIG_X86_32
@@ -2262,8 +2263,7 @@ static void send_cleanup_vector(struct irq_cfg *cfg)
        cfg->move_in_progress = 0;
 }
 
-static void
-__target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
        int apic, pin;
        struct irq_pin_list *entry;
@@ -2294,6 +2294,9 @@ __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
        }
 }
 
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
 /*
  * Either sets desc->affinity to a valid value, and returns
  * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
@@ -2313,21 +2316,19 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
        if (assign_irq_vector(irq, cfg, mask))
                return BAD_APICID;
 
-       /* check that before desc->addinity get updated */
-       set_extra_move_desc(desc, mask);
-
        cpumask_copy(desc->affinity, mask);
 
        return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
 }
 
-static void
+static int
 set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
        struct irq_cfg *cfg;
        unsigned long flags;
        unsigned int dest;
        unsigned int irq;
+       int ret = -1;
 
        irq = desc->irq;
        cfg = desc->chip_data;
@@ -2338,18 +2339,21 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
                /* Only the high 8 bits are valid. */
                dest = SET_APIC_LOGICAL_ID(dest);
                __target_IO_APIC_irq(irq, dest, cfg);
+               ret = 0;
        }
        spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return ret;
 }
 
-static void
+static int
 set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc;
 
        desc = irq_to_desc(irq);
 
-       set_ioapic_affinity_irq_desc(desc, mask);
+       return set_ioapic_affinity_irq_desc(desc, mask);
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -2365,26 +2369,25 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
  */
-static void
+static int
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
        struct irq_cfg *cfg;
        struct irte irte;
        unsigned int dest;
        unsigned int irq;
+       int ret = -1;
 
        if (!cpumask_intersects(mask, cpu_online_mask))
-               return;
+               return ret;
 
        irq = desc->irq;
        if (get_irte(irq, &irte))
-               return;
+               return ret;
 
        cfg = desc->chip_data;
        if (assign_irq_vector(irq, cfg, mask))
-               return;
-
-       set_extra_move_desc(desc, mask);
+               return ret;
 
        dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
@@ -2400,27 +2403,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
                send_cleanup_vector(cfg);
 
        cpumask_copy(desc->affinity, mask);
+
+       return 0;
 }
 
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
                                            const struct cpumask *mask)
 {
-       migrate_ioapic_irq_desc(desc, mask);
+       return migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
                                       const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
 
-       set_ir_ioapic_affinity_irq_desc(desc, mask);
+       return set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
                                                   const struct cpumask *mask)
 {
+       return 0;
 }
 #endif
 
@@ -2482,34 +2488,14 @@ static void irq_complete_move(struct irq_desc **descp)
        struct irq_cfg *cfg = desc->chip_data;
        unsigned vector, me;
 
-       if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-               if (likely(!cfg->move_desc_pending))
-                       return;
-
-               /* domain has not changed, but affinity did */
-               me = smp_processor_id();
-               if (cpumask_test_cpu(me, desc->affinity)) {
-                       *descp = desc = move_irq_desc(desc, me);
-                       /* get the new one */
-                       cfg = desc->chip_data;
-                       cfg->move_desc_pending = 0;
-               }
-#endif
+       if (likely(!cfg->move_in_progress))
                return;
-       }
 
        vector = ~get_irq_regs()->orig_ax;
        me = smp_processor_id();
 
-       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-               *descp = desc = move_irq_desc(desc, me);
-               /* get the new one */
-               cfg = desc->chip_data;
-#endif
+       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
                send_cleanup_vector(cfg);
-       }
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
@@ -2872,7 +2858,7 @@ static inline void __init check_timer(void)
 {
        struct irq_desc *desc = irq_to_desc(0);
        struct irq_cfg *cfg = desc->chip_data;
-       int cpu = boot_cpu_id;
+       int node = cpu_to_node(boot_cpu_id);
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
        int no_pin1 = 0;
@@ -2938,7 +2924,7 @@ static inline void __init check_timer(void)
                 * Ok, does IRQ0 through the IOAPIC work?
                 */
                if (no_pin1) {
-                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+                       add_pin_to_irq_node(cfg, node, apic1, pin1);
                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                } else {
                        /* for edge trigger, setup_IO_APIC_irq already
@@ -2975,7 +2961,7 @@ static inline void __init check_timer(void)
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+               replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
                setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
                enable_8259A_irq(0);
                if (timer_irq_works()) {
@@ -3187,14 +3173,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
        /* Allocate an unused irq */
        unsigned int irq;
        unsigned int new;
        unsigned long flags;
        struct irq_cfg *cfg_new = NULL;
-       int cpu = boot_cpu_id;
        struct irq_desc *desc_new = NULL;
 
        irq = 0;
@@ -3203,7 +3188,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
        spin_lock_irqsave(&vector_lock, flags);
        for (new = irq_want; new < nr_irqs; new++) {
-               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               desc_new = irq_to_desc_alloc_node(new, node);
                if (!desc_new) {
                        printk(KERN_INFO "can not get irq_desc for %d\n", new);
                        continue;
@@ -3212,6 +3197,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
                if (cfg_new->vector != 0)
                        continue;
+
+               desc_new = move_irq_desc(desc_new, node);
+
                if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
                        irq = new;
                break;
@@ -3229,11 +3217,12 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 int create_irq(void)
 {
+       int node = cpu_to_node(boot_cpu_id);
        unsigned int irq_want;
        int irq;
 
        irq_want = nr_irqs_gsi;
-       irq = create_irq_nr(irq_want);
+       irq = create_irq_nr(irq_want, node);
 
        if (irq == 0)
                irq = -1;
@@ -3298,6 +3287,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
                irte.vector = cfg->vector;
                irte.dest_id = IRTE_DEST(dest);
 
+               /* Set source-id of interrupt request */
+               set_msi_sid(&irte, pdev);
+
                modify_irte(irq, &irte);
 
                msg->address_hi = MSI_ADDR_BASE_HI;
@@ -3335,7 +3327,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
@@ -3344,7 +3336,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        cfg = desc->chip_data;
 
@@ -3356,13 +3348,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        write_msi_msg_desc(desc, &msg);
+
+       return 0;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void
+static int
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
@@ -3371,11 +3365,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
        struct irte irte;
 
        if (get_irte(irq, &irte))
-               return;
+               return -1;
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        irte.vector = cfg->vector;
        irte.dest_id = IRTE_DEST(dest);
@@ -3392,6 +3386,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
         */
        if (cfg->move_in_progress)
                send_cleanup_vector(cfg);
+
+       return 0;
 }
 
 #endif
@@ -3487,15 +3483,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
        unsigned int irq_want;
        struct intel_iommu *iommu = NULL;
        int index = 0;
+       int node;
 
        /* x86 doesn't support multiple MSI yet */
        if (type == PCI_CAP_ID_MSI && nvec > 1)
                return 1;
 
+       node = dev_to_node(&dev->dev);
        irq_want = nr_irqs_gsi;
        sub_handle = 0;
        list_for_each_entry(msidesc, &dev->msi_list, list) {
-               irq = create_irq_nr(irq_want);
+               irq = create_irq_nr(irq_want, node);
                if (irq == 0)
                        return -1;
                irq_want = irq + 1;
@@ -3545,7 +3543,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
@@ -3554,7 +3552,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        cfg = desc->chip_data;
 
@@ -3566,11 +3564,13 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        dmar_msi_write(irq, &msg);
+
+       return 0;
 }
 
 #endif /* CONFIG_SMP */
 
-struct irq_chip dmar_msi_type = {
+static struct irq_chip dmar_msi_type = {
        .name = "DMAR_MSI",
        .unmask = dmar_msi_unmask,
        .mask = dmar_msi_mask,
@@ -3599,7 +3599,7 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
@@ -3608,7 +3608,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        cfg = desc->chip_data;
 
@@ -3620,6 +3620,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        hpet_msi_write(irq, &msg);
+
+       return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3676,7 +3678,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
        write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
@@ -3684,11 +3686,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        cfg = desc->chip_data;
 
        target_ht_irq(irq, dest, cfg->vector);
+
+       return 0;
 }
 
 #endif
@@ -3789,6 +3793,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
        mmr_pnode = uv_blade_to_pnode(mmr_blade);
        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
 
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
+
        return irq;
 }
 
@@ -3869,6 +3876,71 @@ int __init arch_probe_nr_irqs(void)
 }
 #endif
 
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int node;
+       int ioapic, pin;
+       int trigger, polarity;
+
+       ioapic = irq_attr->ioapic;
+       if (!IO_APIC_IRQ(irq)) {
+               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+                       ioapic);
+               return -EINVAL;
+       }
+
+       if (dev)
+               node = dev_to_node(dev);
+       else
+               node = cpu_to_node(boot_cpu_id);
+
+       desc = irq_to_desc_alloc_node(irq, node);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
+
+       pin = irq_attr->ioapic_pin;
+       trigger = irq_attr->trigger;
+       polarity = irq_attr->polarity;
+
+       /*
+        * IRQs < 16 are already in the irq_2_pin[] map
+        */
+       if (irq >= NR_IRQS_LEGACY) {
+               cfg = desc->chip_data;
+               add_pin_to_irq_node(cfg, node, ioapic, pin);
+       }
+
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+       return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       int ioapic, pin;
+       /*
+        * Avoid pin reprogramming.  PRTs typically include entries
+        * with redundant pin->gsi mappings (but unique PCI devices);
+        * we only program the IOAPIC on the first.
+        */
+       ioapic = irq_attr->ioapic;
+       pin = irq_attr->ioapic_pin;
+       if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+               pr_debug("Pin %d-%d already programmed\n",
+                        mp_ioapics[ioapic].apicid, pin);
+               return 0;
+       }
+       set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+       return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
@@ -3949,6 +4021,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 
        return apic_id;
 }
+#endif
 
 int __init io_apic_get_version(int ioapic)
 {
@@ -3961,39 +4034,6 @@ int __init io_apic_get_version(int ioapic)
 
        return reg_01.bits.version;
 }
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-       struct irq_desc *desc;
-       struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
-
-       if (!IO_APIC_IRQ(irq)) {
-               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-                       ioapic);
-               return -EINVAL;
-       }
-
-       desc = irq_to_desc_alloc_cpu(irq, cpu);
-       if (!desc) {
-               printk(KERN_INFO "can not get irq_desc %d\n", irq);
-               return 0;
-       }
-
-       /*
-        * IRQs < 16 are already in the irq_2_pin[] map
-        */
-       if (irq >= NR_IRQS_LEGACY) {
-               cfg = desc->chip_data;
-               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
-       }
-
-       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
-       return 0;
-}
-
 
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
@@ -4024,51 +4064,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
 {
-       int pin, ioapic, irq, irq_entry;
+       int pin, ioapic = 0, irq, irq_entry;
        struct irq_desc *desc;
-       struct irq_cfg *cfg;
        const struct cpumask *mask;
 
        if (skip_ioapic_setup == 1)
                return;
 
-       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-                       if (irq_entry == -1)
-                               continue;
-                       irq = pin_2_irq(irq_entry, ioapic, pin);
-
-                       /* setup_IO_APIC_irqs could fail to get vector for some device
-                        * when you have too many devices, because at that time only boot
-                        * cpu is online.
-                        */
-                       desc = irq_to_desc(irq);
-                       cfg = desc->chip_data;
-                       if (!cfg->vector) {
-                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
-                                                 irq_trigger(irq_entry),
-                                                 irq_polarity(irq_entry));
-                               continue;
+#ifdef CONFIG_ACPI
+       if (!acpi_disabled && acpi_ioapic) {
+               ioapic = mp_find_ioapic(0);
+               if (ioapic < 0)
+                       ioapic = 0;
+       }
+#endif
 
-                       }
+       for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+               irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+               if (irq_entry == -1)
+                       continue;
+               irq = pin_2_irq(irq_entry, ioapic, pin);
 
-                       /*
-                        * Honour affinities which have been set in early boot
-                        */
-                       if (desc->status &
-                           (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-                               mask = desc->affinity;
-                       else
-                               mask = apic->target_cpus();
+               desc = irq_to_desc(irq);
 
-                       if (intr_remapping_enabled)
-                               set_ir_ioapic_affinity_irq_desc(desc, mask);
-                       else
-                               set_ioapic_affinity_irq_desc(desc, mask);
-               }
+               /*
+                * Honour affinities which have been set in early boot
+                */
+               if (desc->status &
+                   (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+                       mask = desc->affinity;
+               else
+                       mask = apic->target_cpus();
 
+               if (intr_remapping_enabled)
+                       set_ir_ioapic_affinity_irq_desc(desc, mask);
+               else
+                       set_ioapic_affinity_irq_desc(desc, mask);
        }
+
 }
 #endif
 
@@ -4151,28 +4184,20 @@ fake_ioapic_page:
        }
 }
 
-static int __init ioapic_insert_resources(void)
+void __init ioapic_insert_resources(void)
 {
        int i;
        struct resource *r = ioapic_resources;
 
        if (!r) {
-               if (nr_ioapics > 0) {
+               if (nr_ioapics > 0)
                        printk(KERN_ERR
                                "IO APIC resources couldn't be allocated.\n");
-                       return -1;
-               }
-               return 0;
+               return;
        }
 
        for (i = 0; i < nr_ioapics; i++) {
                insert_resource(&iomem_resource, r);
                r++;
        }
-
-       return 0;
 }
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);