x86: Fix out of order of gsi
[safe/jmp/linux-2.6] / arch / x86 / kernel / apic / io_apic.c
index 8499000..97e1e3e 100644 (file)
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_irq.h>
+#include <asm/hw_irq.h>
 
 #include <asm/apic.h>
 
 #define __apicdebuginit(type) static type __init
+#define for_each_irq_pin(entry, head) \
+       for (entry = head; entry; entry = entry->next)
 
 /*
  *      Is the SiS APIC rmw bug present ?
@@ -72,8 +73,8 @@
  */
 int sis_apic_bug = -1;
 
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(vector_lock);
 
 /*
  * # of IRQ routing registers
@@ -84,12 +85,18 @@ int nr_ioapic_registers[MAX_IO_APICS];
 struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
+/* IO APIC gsi routing info */
+struct mp_ioapic_gsi  mp_gsi_routing[MAX_IO_APICS];
+
 /* MP IRQ source entries */
 struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+/* GSI interrupts */
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
+
 #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
 int mp_bus_id_to_type[MAX_MP_BUSSES];
 #endif
@@ -115,92 +122,65 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-struct irq_pin_list;
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
 struct irq_pin_list {
        int apic, pin;
        struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 {
        struct irq_pin_list *pin;
-       int node;
-
-       node = cpu_to_node(cpu);
 
        pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
 
        return pin;
 }
 
-struct irq_cfg {
-       struct irq_pin_list *irq_2_pin;
-       cpumask_var_t domain;
-       cpumask_var_t old_domain;
-       unsigned move_cleanup_count;
-       u8 vector;
-       u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-       u8 move_desc_pending : 1;
-#endif
-};
-
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 #ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg irq_cfgx[] = {
+static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
 #else
-static struct irq_cfg irq_cfgx[NR_IRQS] = {
+static struct irq_cfg irq_cfgx[NR_IRQS];
 #endif
-       [0]  = { .vector = IRQ0_VECTOR,  },
-       [1]  = { .vector = IRQ1_VECTOR,  },
-       [2]  = { .vector = IRQ2_VECTOR,  },
-       [3]  = { .vector = IRQ3_VECTOR,  },
-       [4]  = { .vector = IRQ4_VECTOR,  },
-       [5]  = { .vector = IRQ5_VECTOR,  },
-       [6]  = { .vector = IRQ6_VECTOR,  },
-       [7]  = { .vector = IRQ7_VECTOR,  },
-       [8]  = { .vector = IRQ8_VECTOR,  },
-       [9]  = { .vector = IRQ9_VECTOR,  },
-       [10] = { .vector = IRQ10_VECTOR, },
-       [11] = { .vector = IRQ11_VECTOR, },
-       [12] = { .vector = IRQ12_VECTOR, },
-       [13] = { .vector = IRQ13_VECTOR, },
-       [14] = { .vector = IRQ14_VECTOR, },
-       [15] = { .vector = IRQ15_VECTOR, },
-};
+
+void __init io_apic_disable_legacy(void)
+{
+       nr_legacy_irqs = 0;
+       nr_irqs_gsi = 0;
+}
 
 int __init arch_early_irq_init(void)
 {
        struct irq_cfg *cfg;
        struct irq_desc *desc;
        int count;
+       int node;
        int i;
 
        cfg = irq_cfgx;
        count = ARRAY_SIZE(irq_cfgx);
+       node= cpu_to_node(boot_cpu_id);
 
        for (i = 0; i < count; i++) {
                desc = irq_to_desc(i);
                desc->chip_data = &cfg[i];
-               alloc_bootmem_cpumask_var(&cfg[i].domain);
-               alloc_bootmem_cpumask_var(&cfg[i].old_domain);
-               if (i < NR_IRQS_LEGACY)
-                       cpumask_setall(cfg[i].domain);
+               zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
+               zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
+               /*
+                * For legacy IRQ's, start with assigning irq0 to irq15 to
+                * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
+                */
+               if (i < nr_legacy_irqs) {
+                       cfg[i].vector = IRQ0_VECTOR + i;
+                       cpumask_set_cpu(0, cfg[i].domain);
+               }
        }
 
        return 0;
 }
 
 #ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
        struct irq_cfg *cfg = NULL;
        struct irq_desc *desc;
@@ -212,39 +192,33 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
        return cfg;
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
 {
        struct irq_cfg *cfg;
-       int node;
-
-       node = cpu_to_node(cpu);
 
        cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
        if (cfg) {
-               if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+               if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
                        kfree(cfg);
                        cfg = NULL;
-               } else if (!alloc_cpumask_var_node(&cfg->old_domain,
+               } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
                                                          GFP_ATOMIC, node)) {
                        free_cpumask_var(cfg->domain);
                        kfree(cfg);
                        cfg = NULL;
-               } else {
-                       cpumask_clear(cfg->domain);
-                       cpumask_clear(cfg->old_domain);
                }
        }
 
        return cfg;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
 {
        struct irq_cfg *cfg;
 
        cfg = desc->chip_data;
        if (!cfg) {
-               desc->chip_data = get_one_free_irq_cfg(cpu);
+               desc->chip_data = get_one_free_irq_cfg(node);
                if (!desc->chip_data) {
                        printk(KERN_ERR "can not alloc irq_cfg\n");
                        BUG_ON(1);
@@ -254,10 +228,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
        return 0;
 }
 
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
 static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
 {
        struct irq_pin_list *old_entry, *head, *tail, *entry;
 
@@ -266,7 +239,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
        if (!old_entry)
                return;
 
-       entry = get_one_free_irq_2_pin(cpu);
+       entry = get_one_free_irq_2_pin(node);
        if (!entry)
                return;
 
@@ -276,7 +249,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
        tail            = entry;
        old_entry       = old_entry->next;
        while (old_entry) {
-               entry = get_one_free_irq_2_pin(cpu);
+               entry = get_one_free_irq_2_pin(node);
                if (!entry) {
                        entry = head;
                        while (entry) {
@@ -316,12 +289,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
 }
 
 void arch_init_copy_chip_data(struct irq_desc *old_desc,
-                                struct irq_desc *desc, int cpu)
+                                struct irq_desc *desc, int node)
 {
        struct irq_cfg *cfg;
        struct irq_cfg *old_cfg;
 
-       cfg = get_one_free_irq_cfg(cpu);
+       cfg = get_one_free_irq_cfg(node);
 
        if (!cfg)
                return;
@@ -332,7 +305,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
 
        memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
-       init_copy_irq_2_pin(old_cfg, cfg, cpu);
+       init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
 static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -356,35 +329,16 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
                old_desc->chip_data = NULL;
        }
 }
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg = desc->chip_data;
-
-       if (!cfg->move_in_progress) {
-               /* it means that domain is not changed */
-               if (!cpumask_intersects(desc->affinity, mask))
-                       cfg->move_desc_pending = 1;
-       }
-}
-#endif
+/* end for move_irq_desc */
 
 #else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
        return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
 
 #endif
 
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
 struct io_apic {
        unsigned int index;
        unsigned int unused[3];
@@ -439,26 +393,20 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
        struct irq_pin_list *entry;
        unsigned long flags;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       entry = cfg->irq_2_pin;
-       for (;;) {
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
                unsigned int reg;
                int pin;
 
-               if (!entry)
-                       break;
                pin = entry->pin;
                reg = io_apic_read(entry->apic, 0x10 + pin*2);
                /* Is the remote IRR bit set? */
                if (reg & IO_APIC_REDIR_REMOTE_IRR) {
-                       spin_unlock_irqrestore(&ioapic_lock, flags);
+                       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
                        return true;
                }
-               if (!entry->next)
-                       break;
-               entry = entry->next;
        }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return false;
 }
@@ -472,10 +420,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 {
        union entry_union eu;
        unsigned long flags;
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
        eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
        return eu.entry;
 }
 
@@ -488,7 +436,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 static void
 __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
-       union entry_union eu;
+       union entry_union eu = {{0, 0}};
+
        eu.entry = e;
        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -497,9 +446,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
        unsigned long flags;
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        __ioapic_write_entry(apic, pin, e);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 /*
@@ -512,10 +461,10 @@ static void ioapic_mask_entry(int apic, int pin)
        unsigned long flags;
        union entry_union eu = { .entry.mask = 1 };
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 /*
@@ -523,81 +472,95 @@ static void ioapic_mask_entry(int apic, int pin)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static int
+add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-       struct irq_pin_list *entry;
-
-       entry = cfg->irq_2_pin;
-       if (!entry) {
-               entry = get_one_free_irq_2_pin(cpu);
-               if (!entry) {
-                       printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
-                                       apic, pin);
-                       return;
-               }
-               cfg->irq_2_pin = entry;
-               entry->apic = apic;
-               entry->pin = pin;
-               return;
-       }
+       struct irq_pin_list **last, *entry;
 
-       while (entry->next) {
-               /* not again, please */
+       /* don't allow duplicates */
+       last = &cfg->irq_2_pin;
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
                if (entry->apic == apic && entry->pin == pin)
-                       return;
-
-               entry = entry->next;
+                       return 0;
+               last = &entry->next;
        }
 
-       entry->next = get_one_free_irq_2_pin(cpu);
-       entry = entry->next;
+       entry = get_one_free_irq_2_pin(node);
+       if (!entry) {
+               printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
+                               node, apic, pin);
+               return -ENOMEM;
+       }
        entry->apic = apic;
        entry->pin = pin;
+
+       *last = entry;
+       return 0;
+}
+
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+       if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+               panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
 }
 
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
-                                     int oldapic, int oldpin,
-                                     int newapic, int newpin)
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
+                                          int oldapic, int oldpin,
+                                          int newapic, int newpin)
 {
-       struct irq_pin_list *entry = cfg->irq_2_pin;
-       int replaced = 0;
+       struct irq_pin_list *entry;
 
-       while (entry) {
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
                if (entry->apic == oldapic && entry->pin == oldpin) {
                        entry->apic = newapic;
                        entry->pin = newpin;
-                       replaced = 1;
                        /* every one is different, right? */
-                       break;
+                       return;
                }
-               entry = entry->next;
        }
 
-       /* why? call replace before add? */
-       if (!replaced)
-               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+       /* old apic/pin didn't exist, so just add new ones */
+       add_pin_to_irq_node(cfg, node, newapic, newpin);
+}
+
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
+                                int mask_and, int mask_or,
+                                void (*final)(struct irq_pin_list *entry))
+{
+       unsigned int reg, pin;
+
+       pin = entry->pin;
+       reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+       reg &= mask_and;
+       reg |= mask_or;
+       io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+       if (final)
+               final(entry);
 }
 
-static inline void io_apic_modify_irq(struct irq_cfg *cfg,
-                               int mask_and, int mask_or,
-                               void (*final)(struct irq_pin_list *entry))
+static void io_apic_modify_irq(struct irq_cfg *cfg,
+                              int mask_and, int mask_or,
+                              void (*final)(struct irq_pin_list *entry))
 {
-       int pin;
        struct irq_pin_list *entry;
 
-       for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
-               unsigned int reg;
-               pin = entry->pin;
-               reg = io_apic_read(entry->apic, 0x10 + pin * 2);
-               reg &= mask_and;
-               reg |= mask_or;
-               io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
-               if (final)
-                       final(entry);
-       }
+       for_each_irq_pin(entry, cfg->irq_2_pin)
+               __io_apic_modify_irq(entry, mask_and, mask_or, final);
+}
+
+static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
+{
+       __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+                            IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
+{
+       __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
+                            IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
 
 static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
@@ -605,7 +568,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
        io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
-#ifdef CONFIG_X86_64
 static void io_apic_sync(struct irq_pin_list *entry)
 {
        /*
@@ -621,24 +583,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 {
        io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
-#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
-                       IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
-                       IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
@@ -647,9 +591,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 
        BUG_ON(!cfg);
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        __mask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
@@ -657,9 +601,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
        struct irq_cfg *cfg = desc->chip_data;
        unsigned long flags;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        __unmask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void mask_IO_APIC_irq(unsigned int irq)
@@ -736,7 +680,6 @@ static int __init ioapic_pirq_setup(char *str)
 __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_INTR_REMAP
 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
 {
        int apic;
@@ -834,20 +777,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
        return 0;
 }
 
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
-       struct IO_APIC_route_entry **ioapic_entries)
-
-{
-       /*
-        * for now plain restore of previous settings.
-        * TBD: In the case of OS enabling interrupt-remapping,
-        * IO-APIC RTE's need to be setup to point to interrupt-remapping
-        * table entries. for now, do a plain restore, and wait for
-        * the setup_IO_APIC_irqs() to do proper initialization.
-        */
-       restore_IO_APIC_setup(ioapic_entries);
-}
-
 void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 {
        int apic;
@@ -857,7 +786,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 
        kfree(ioapic_entries);
 }
-#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -918,61 +846,13 @@ static int __init find_isa_irq_apic(int irq, int type)
        return -1;
 }
 
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-       int apic, i, best_guess = -1;
-
-       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-               bus, slot, pin);
-       if (test_bit(bus, mp_bus_not_pci)) {
-               apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-               return -1;
-       }
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].srcbus;
-
-               for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
-                           mp_irqs[i].dstapic == MP_APIC_ALL)
-                               break;
-
-               if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].irqtype &&
-                   (bus == lbus) &&
-                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
-                       if (!(apic || IO_APIC_IRQ(irq)))
-                               continue;
-
-                       if (pin == (mp_irqs[i].srcbusirq & 3))
-                               return irq;
-                       /*
-                        * Use the first all-but-pin matching entry as a
-                        * best-guess fuzzy result for broken mptables.
-                        */
-                       if (best_guess < 0)
-                               best_guess = irq;
-               }
-       }
-       return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
  */
 static int EISA_ELCR(unsigned int irq)
 {
-       if (irq < NR_IRQS_LEGACY) {
+       if (irq < nr_legacy_irqs) {
                unsigned int port = 0x4d0 + (irq >> 3);
                return (inb(port) >> (irq & 7)) & 1;
        }
@@ -1184,17 +1064,75 @@ static int pin_2_irq(int idx, int apic, int pin)
        return irq;
 }
 
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       int apic, i, best_guess = -1;
+
+       apic_printk(APIC_DEBUG,
+                   "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+                   bus, slot, pin);
+       if (test_bit(bus, mp_bus_not_pci)) {
+               apic_printk(APIC_VERBOSE,
+                           "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].srcbus;
+
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+                           mp_irqs[i].dstapic == MP_APIC_ALL)
+                               break;
+
+               if (!test_bit(lbus, mp_bus_not_pci) &&
+                   !mp_irqs[i].irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+
+                       if (pin == (mp_irqs[i].srcbusirq & 3)) {
+                               set_io_apic_irq_attr(irq_attr, apic,
+                                                    mp_irqs[i].dstirq,
+                                                    irq_trigger(i),
+                                                    irq_polarity(i));
+                               return irq;
+                       }
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0) {
+                               set_io_apic_irq_attr(irq_attr, apic,
+                                                    mp_irqs[i].dstirq,
+                                                    irq_trigger(i),
+                                                    irq_polarity(i));
+                               best_guess = irq;
+                       }
+               }
+       }
+       return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
 void lock_vector_lock(void)
 {
        /* Used to the online set of cpus does not change
         * during assign_irq_vector.
         */
-       spin_lock(&vector_lock);
+       raw_spin_lock(&vector_lock);
 }
 
 void unlock_vector_lock(void)
 {
-       spin_unlock(&vector_lock);
+       raw_spin_unlock(&vector_lock);
 }
 
 static int
@@ -1211,12 +1149,13 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
         * Also, we've got to be careful not to trash gate
         * 0x80, because int 0x80 is hm, kind of importantish. ;)
         */
-       static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+       static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+       static int current_offset = VECTOR_OFFSET_START % 8;
        unsigned int old_vector;
        int cpu, err;
        cpumask_var_t tmp_mask;
 
-       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+       if (cfg->move_in_progress)
                return -EBUSY;
 
        if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
@@ -1247,7 +1186,7 @@ next:
                if (vector >= first_system_vector) {
                        /* If out of vectors on large boxen, must share them. */
                        offset = (offset + 1) % 8;
-                       vector = FIRST_DEVICE_VECTOR + offset;
+                       vector = FIRST_EXTERNAL_VECTOR + offset;
                }
                if (unlikely(current_vector == vector))
                        continue;
@@ -1276,15 +1215,14 @@ next:
        return err;
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
        int err;
        unsigned long flags;
 
-       spin_lock_irqsave(&vector_lock, flags);
+       raw_spin_lock_irqsave(&vector_lock, flags);
        err = __assign_irq_vector(irq, cfg, mask);
-       spin_unlock_irqrestore(&vector_lock, flags);
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
        return err;
 }
 
@@ -1318,11 +1256,16 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 void __setup_vector_irq(int cpu)
 {
        /* Initialize vector_irq on a new cpu */
-       /* This function must be called with vector_lock held */
        int irq, vector;
        struct irq_cfg *cfg;
        struct irq_desc *desc;
 
+       /*
+        * vector_lock will make sure that we don't run into irq vector
+        * assignments that might be happening on another cpu in parallel,
+        * while we setup our initial vector to irq mappings.
+        */
+       raw_spin_lock(&vector_lock);
        /* Mark the inuse vectors */
        for_each_irq_desc(irq, desc) {
                cfg = desc->chip_data;
@@ -1341,6 +1284,7 @@ void __setup_vector_irq(int cpu)
                if (!cpumask_test_cpu(cpu, cfg->domain))
                        per_cpu(vector_irq, cpu)[vector] = -1;
        }
+       raw_spin_unlock(&vector_lock);
 }
 
 static struct irq_chip ioapic_chip;
@@ -1445,6 +1389,9 @@ int setup_ioapic_entry(int apic_id, int irq,
                irte.vector = vector;
                irte.dest_id = IRTE_DEST(destination);
 
+               /* Set source-id of interrupt request */
+               set_ioapic_sid(&irte, apic_id);
+
                modify_irte(irq, &irte);
 
                ir_entry->index2 = (index >> 15) & 0x1;
@@ -1487,6 +1434,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
 
        cfg = desc->chip_data;
 
+       /*
+        * For legacy irqs, cfg->domain starts with cpu 0 for legacy
+        * controllers like 8259. Now that IO-APIC can handle this irq, update
+        * the cfg->domain.
+        */
+       if (irq < nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
+               apic->vector_allocation_domain(0, cfg->domain);
+
        if (assign_irq_vector(irq, cfg, apic->target_cpus()))
                return;
 
@@ -1508,64 +1463,72 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
        }
 
        ioapic_register_intr(irq, desc, trigger);
-       if (irq < NR_IRQS_LEGACY)
+       if (irq < nr_legacy_irqs)
                disable_8259A_irq(irq);
 
        ioapic_write_entry(apic_id, pin, entry);
 }
 
+static struct {
+       DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
 static void __init setup_IO_APIC_irqs(void)
 {
        int apic_id, pin, idx, irq;
        int notcon = 0;
        struct irq_desc *desc;
        struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
+       int node = cpu_to_node(boot_cpu_id);
 
        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-       for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-               for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
-
-                       idx = find_irq_entry(apic_id, pin, mp_INT);
-                       if (idx == -1) {
-                               if (!notcon) {
-                                       notcon = 1;
-                                       apic_printk(APIC_VERBOSE,
-                                               KERN_DEBUG " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               } else
-                                       apic_printk(APIC_VERBOSE, " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               continue;
-                       }
-                       if (notcon) {
+       for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
+       for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+               idx = find_irq_entry(apic_id, pin, mp_INT);
+               if (idx == -1) {
+                       if (!notcon) {
+                               notcon = 1;
                                apic_printk(APIC_VERBOSE,
-                                       " (apicid-pin) not connected\n");
-                               notcon = 0;
-                       }
+                                       KERN_DEBUG " %d-%d",
+                                       mp_ioapics[apic_id].apicid, pin);
+                       } else
+                               apic_printk(APIC_VERBOSE, " %d-%d",
+                                       mp_ioapics[apic_id].apicid, pin);
+                       continue;
+               }
+               if (notcon) {
+                       apic_printk(APIC_VERBOSE,
+                               " (apicid-pin) not connected\n");
+                       notcon = 0;
+               }
 
-                       irq = pin_2_irq(idx, apic_id, pin);
+               irq = pin_2_irq(idx, apic_id, pin);
 
-                       /*
-                        * Skip the timer IRQ if there's a quirk handler
-                        * installed and if it returns 1:
-                        */
-                       if (apic->multi_timer_check &&
-                                       apic->multi_timer_check(apic_id, irq))
-                               continue;
+               if ((apic_id > 0) && (irq > 16))
+                       continue;
 
-                       desc = irq_to_desc_alloc_cpu(irq, cpu);
-                       if (!desc) {
-                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-                               continue;
-                       }
-                       cfg = desc->chip_data;
-                       add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
+               /*
+                * Skip the timer IRQ if there's a quirk handler
+                * installed and if it returns 1:
+                */
+               if (apic->multi_timer_check &&
+                               apic->multi_timer_check(apic_id, irq))
+                       continue;
 
-                       setup_IO_APIC_irq(apic_id, pin, irq, desc,
-                                       irq_trigger(idx), irq_polarity(idx));
+               desc = irq_to_desc_alloc_node(irq, node);
+               if (!desc) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                       continue;
                }
+               cfg = desc->chip_data;
+               add_pin_to_irq_node(cfg, node, apic_id, pin);
+               /*
+                * don't mark it in pin_programmed, so later acpi could
+                * set it correctly when irq < 16
+                */
+               setup_IO_APIC_irq(apic_id, pin, irq, desc,
+                               irq_trigger(idx), irq_polarity(idx));
        }
 
        if (notcon)
@@ -1574,6 +1537,56 @@ static void __init setup_IO_APIC_irqs(void)
 }
 
 /*
+ * for the gsit that is not in first ioapic
+ * but could not use acpi_register_gsi()
+ * like some special sci in IBM x3330
+ */
+void setup_IO_APIC_irq_extra(u32 gsi)
+{
+       int apic_id = 0, pin, idx, irq;
+       int node = cpu_to_node(boot_cpu_id);
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+
+       /*
+        * Convert 'gsi' to 'ioapic.pin'.
+        */
+       apic_id = mp_find_ioapic(gsi);
+       if (apic_id < 0)
+               return;
+
+       pin = mp_find_ioapic_pin(apic_id, gsi);
+       idx = find_irq_entry(apic_id, pin, mp_INT);
+       if (idx == -1)
+               return;
+
+       irq = pin_2_irq(idx, apic_id, pin);
+#ifdef CONFIG_SPARSE_IRQ
+       desc = irq_to_desc(irq);
+       if (desc)
+               return;
+#endif
+       desc = irq_to_desc_alloc_node(irq, node);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+               return;
+       }
+
+       cfg = desc->chip_data;
+       add_pin_to_irq_node(cfg, node, apic_id, pin);
+
+       if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
+               pr_debug("Pin %d-%d already programmed\n",
+                        mp_ioapics[apic_id].apicid, pin);
+               return;
+       }
+       set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+
+       setup_IO_APIC_irq(apic_id, pin, irq, desc,
+                       irq_trigger(idx), irq_polarity(idx));
+}
+
+/*
  * Set up the timer pin, possibly with the 8259A-master behind.
  */
 static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
@@ -1623,9 +1636,6 @@ __apicdebuginit(void) print_IO_APIC(void)
        struct irq_desc *desc;
        unsigned int irq;
 
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
        printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
        for (i = 0; i < nr_ioapics; i++)
                printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
@@ -1639,14 +1649,14 @@ __apicdebuginit(void) print_IO_APIC(void)
 
        for (apic = 0; apic < nr_ioapics; apic++) {
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_00.raw = io_apic_read(apic, 0);
        reg_01.raw = io_apic_read(apic, 1);
        if (reg_01.bits.version >= 0x10)
                reg_02.raw = io_apic_read(apic, 2);
        if (reg_01.bits.version >= 0x20)
                reg_03.raw = io_apic_read(apic, 3);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        printk("\n");
        printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
@@ -1718,12 +1728,8 @@ __apicdebuginit(void) print_IO_APIC(void)
                if (!entry)
                        continue;
                printk(KERN_DEBUG "IRQ%d ", irq);
-               for (;;) {
+               for_each_irq_pin(entry, cfg->irq_2_pin)
                        printk("-> %d:%d", entry->apic, entry->pin);
-                       if (!entry->next)
-                               break;
-                       entry = entry->next;
-               }
                printk("\n");
        }
 
@@ -1732,36 +1738,24 @@ __apicdebuginit(void) print_IO_APIC(void)
        return;
 }
 
-__apicdebuginit(void) print_APIC_bitfield(int base)
+__apicdebuginit(void) print_APIC_field(int base)
 {
-       unsigned int v;
-       int i, j;
+       int i;
 
-       if (apic_verbosity == APIC_QUIET)
-               return;
+       printk(KERN_DEBUG);
 
-       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-       for (i = 0; i < 8; i++) {
-               v = apic_read(base + i*0x10);
-               for (j = 0; j < 32; j++) {
-                       if (v & (1<<j))
-                               printk("1");
-                       else
-                               printk("0");
-               }
-               printk("\n");
-       }
+       for (i = 0; i < 8; i++)
+               printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+
+       printk(KERN_CONT "\n");
 }
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
-       unsigned int v, ver, maxlvt;
+       unsigned int i, v, ver, maxlvt;
        u64 icr;
 
-       if (apic_verbosity == APIC_QUIET)
-               return;
-
-       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+       printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
        printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
@@ -1802,11 +1796,11 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
        printk(KERN_DEBUG "... APIC ISR field:\n");
-       print_APIC_bitfield(APIC_ISR);
+       print_APIC_field(APIC_ISR);
        printk(KERN_DEBUG "... APIC TMR field:\n");
-       print_APIC_bitfield(APIC_TMR);
+       print_APIC_field(APIC_TMR);
        printk(KERN_DEBUG "... APIC IRR field:\n");
-       print_APIC_bitfield(APIC_IRR);
+       print_APIC_field(APIC_IRR);
 
        if (APIC_INTEGRATED(ver)) {             /* !82489DX */
                if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
@@ -1843,16 +1837,34 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
        v = apic_read(APIC_TDCR);
        printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-       printk("\n");
-}
 
-__apicdebuginit(void) print_all_local_APICs(void)
+       if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+               v = apic_read(APIC_EFEAT);
+               maxlvt = (v >> 16) & 0xff;
+               printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+               v = apic_read(APIC_ECTRL);
+               printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+               for (i = 0; i < maxlvt; i++) {
+                       v = apic_read(APIC_EILVTn(i));
+                       printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+               }
+       }
+       printk("\n");
+}
+
+__apicdebuginit(void) print_local_APICs(int maxcpu)
 {
        int cpu;
 
+       if (!maxcpu)
+               return;
+
        preempt_disable();
-       for_each_online_cpu(cpu)
+       for_each_online_cpu(cpu) {
+               if (cpu >= maxcpu)
+                       break;
                smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+       }
        preempt_enable();
 }
 
@@ -1861,12 +1873,12 @@ __apicdebuginit(void) print_PIC(void)
        unsigned int v;
        unsigned long flags;
 
-       if (apic_verbosity == APIC_QUIET)
+       if (!nr_legacy_irqs)
                return;
 
        printk(KERN_DEBUG "\nprinting PIC contents\n");
 
-       spin_lock_irqsave(&i8259A_lock, flags);
+       raw_spin_lock_irqsave(&i8259A_lock, flags);
 
        v = inb(0xa1) << 8 | inb(0x21);
        printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
@@ -1880,7 +1892,7 @@ __apicdebuginit(void) print_PIC(void)
        outb(0x0a,0xa0);
        outb(0x0a,0x20);
 
-       spin_unlock_irqrestore(&i8259A_lock, flags);
+       raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 
        printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
 
@@ -1888,16 +1900,41 @@ __apicdebuginit(void) print_PIC(void)
        printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 }
 
-__apicdebuginit(int) print_all_ICs(void)
+static int __initdata show_lapic = 1;
+static __init int setup_show_lapic(char *arg)
 {
+       int num = -1;
+
+       if (strcmp(arg, "all") == 0) {
+               show_lapic = CONFIG_NR_CPUS;
+       } else {
+               get_option(&arg, &num);
+               if (num >= 0)
+                       show_lapic = num;
+       }
+
+       return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+__apicdebuginit(int) print_ICs(void)
+{
+       if (apic_verbosity == APIC_QUIET)
+               return 0;
+
        print_PIC();
-       print_all_local_APICs();
+
+       /* don't print out if apic is not there */
+       if (!cpu_has_apic && !apic_from_smp_config())
+               return 0;
+
+       print_local_APICs(show_lapic);
        print_IO_APIC();
 
        return 0;
 }
 
-fs_initcall(print_all_ICs);
+fs_initcall(print_ICs);
 
 
 /* Where if anywhere is the i8259 connect in external int mode */
@@ -1914,11 +1951,15 @@ void __init enable_IO_APIC(void)
         * The number of IO-APIC IRQ registers (== #pins):
         */
        for (apic = 0; apic < nr_ioapics; apic++) {
-               spin_lock_irqsave(&ioapic_lock, flags);
+               raw_spin_lock_irqsave(&ioapic_lock, flags);
                reg_01.raw = io_apic_read(apic, 1);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               raw_spin_unlock_irqrestore(&ioapic_lock, flags);
                nr_ioapic_registers[apic] = reg_01.bits.entries+1;
        }
+
+       if (!nr_legacy_irqs)
+               return;
+
        for(apic = 0; apic < nr_ioapics; apic++) {
                int pin;
                /* See if any of the pins is in ExtINT mode */
@@ -1973,6 +2014,9 @@ void disable_IO_APIC(void)
         */
        clear_IO_APIC();
 
+       if (!nr_legacy_irqs)
+               return;
+
        /*
         * If the i8259 is routed through an IOAPIC
         * Put that IOAPIC in virtual wire mode
@@ -2006,7 +2050,9 @@ void disable_IO_APIC(void)
        /*
         * Use virtual wire A mode when interrupt remapping is enabled.
         */
-       disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
+       if (cpu_has_apic || apic_from_smp_config())
+               disconnect_bsp_APIC(!intr_remapping_enabled &&
+                               ioapic_i8259.pin != -1);
 }
 
 #ifdef CONFIG_X86_32
@@ -2017,7 +2063,7 @@ void disable_IO_APIC(void)
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
 
-static void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc(void)
 {
        union IO_APIC_reg_00 reg_00;
        physid_mask_t phys_id_present_map;
@@ -2026,9 +2072,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
        unsigned char old_id;
        unsigned long flags;
 
-       if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+       if (acpi_ioapic)
                return;
-
        /*
         * Don't check I/O APIC IDs for xAPIC systems.  They have
         * no meaning without the serial APIC bus.
@@ -2040,7 +2085,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
         * This is broken; anything with a real cpu count has to
         * circumvent this idiocy regardless.
         */
-       phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+       apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
 
        /*
         * Set the IOAPIC ID to the value stored in the MPC table.
@@ -2048,9 +2093,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
        for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
 
                /* Read the register 0 value */
-               spin_lock_irqsave(&ioapic_lock, flags);
+               raw_spin_lock_irqsave(&ioapic_lock, flags);
                reg_00.raw = io_apic_read(apic_id, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
                old_id = mp_ioapics[apic_id].apicid;
 
@@ -2067,7 +2112,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
                 * system must have a unique ID or we get lots of nice
                 * 'stuck on smp_invalidate_needed IPI wait' messages.
                 */
-               if (apic->check_apicid_used(phys_id_present_map,
+               if (apic->check_apicid_used(&phys_id_present_map,
                                        mp_ioapics[apic_id].apicid)) {
                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
                                apic_id, mp_ioapics[apic_id].apicid);
@@ -2082,7 +2127,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
                        mp_ioapics[apic_id].apicid = i;
                } else {
                        physid_mask_t tmp;
-                       tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
+                       apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
                        apic_printk(APIC_VERBOSE, "Setting %d in the "
                                        "phys_id_present_map\n",
                                        mp_ioapics[apic_id].apicid);
@@ -2109,16 +2154,16 @@ static void __init setup_ioapic_ids_from_mpc(void)
                        mp_ioapics[apic_id].apicid);
 
                reg_00.bits.ID = mp_ioapics[apic_id].apicid;
-               spin_lock_irqsave(&ioapic_lock, flags);
+               raw_spin_lock_irqsave(&ioapic_lock, flags);
                io_apic_write(apic_id, 0, reg_00.raw);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
                /*
                 * Sanity check
                 */
-               spin_lock_irqsave(&ioapic_lock, flags);
+               raw_spin_lock_irqsave(&ioapic_lock, flags);
                reg_00.raw = io_apic_read(apic_id, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               raw_spin_unlock_irqrestore(&ioapic_lock, flags);
                if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
                        printk("could not set ID!\n");
                else
@@ -2201,40 +2246,31 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
        unsigned long flags;
        struct irq_cfg *cfg;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < NR_IRQS_LEGACY) {
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < nr_legacy_irqs) {
                disable_8259A_irq(irq);
                if (i8259A_irq_pending(irq))
                        was_pending = 1;
        }
        cfg = irq_cfg(irq);
        __unmask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return was_pending;
 }
 
-#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
 
        struct irq_cfg *cfg = irq_cfg(irq);
        unsigned long flags;
 
-       spin_lock_irqsave(&vector_lock, flags);
+       raw_spin_lock_irqsave(&vector_lock, flags);
        apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
-       spin_unlock_irqrestore(&vector_lock, flags);
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
 
        return 1;
 }
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-       apic->send_IPI_self(irq_cfg(irq)->vector);
-
-       return 1;
-}
-#endif
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
@@ -2246,40 +2282,31 @@ static int ioapic_retrigger_irq(unsigned int irq)
  */
 
 #ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
 {
        cpumask_var_t cleanup_mask;
 
        if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
                unsigned int i;
-               cfg->move_cleanup_count = 0;
-               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-                       cfg->move_cleanup_count++;
                for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
                        apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
        } else {
                cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
                apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
                free_cpumask_var(cleanup_mask);
        }
        cfg->move_in_progress = 0;
 }
 
-static void
-__target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
        int apic, pin;
        struct irq_pin_list *entry;
        u8 vector = cfg->vector;
 
-       entry = cfg->irq_2_pin;
-       for (;;) {
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
                unsigned int reg;
 
-               if (!entry)
-                       break;
-
                apic = entry->apic;
                pin = entry->pin;
                /*
@@ -2292,68 +2319,67 @@ __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                reg |= vector;
                io_apic_modify(apic, 0x10 + pin*2, reg);
-               if (!entry->next)
-                       break;
-               entry = entry->next;
        }
 }
 
 /*
  * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
  * leaves desc->affinity untouched.
  */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
+                 unsigned int *dest_id)
 {
        struct irq_cfg *cfg;
        unsigned int irq;
 
        if (!cpumask_intersects(mask, cpu_online_mask))
-               return BAD_APICID;
+               return -1;
 
        irq = desc->irq;
        cfg = desc->chip_data;
        if (assign_irq_vector(irq, cfg, mask))
-               return BAD_APICID;
-
-       /* check that before desc->addinity get updated */
-       set_extra_move_desc(desc, mask);
+               return -1;
 
        cpumask_copy(desc->affinity, mask);
 
-       return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+       *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+       return 0;
 }
 
-static void
+static int
 set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
        struct irq_cfg *cfg;
        unsigned long flags;
        unsigned int dest;
        unsigned int irq;
+       int ret = -1;
 
        irq = desc->irq;
        cfg = desc->chip_data;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       dest = set_desc_affinity(desc, mask);
-       if (dest != BAD_APICID) {
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
+       ret = set_desc_affinity(desc, mask, &dest);
+       if (!ret) {
                /* Only the high 8 bits are valid. */
                dest = SET_APIC_LOGICAL_ID(dest);
                __target_IO_APIC_irq(irq, dest, cfg);
        }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return ret;
 }
 
-static void
+static int
 set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc;
 
        desc = irq_to_desc(irq);
 
-       set_ioapic_affinity_irq_desc(desc, mask);
+       return set_ioapic_affinity_irq_desc(desc, mask);
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -2369,26 +2395,25 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
  */
-static void
+static int
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
        struct irq_cfg *cfg;
        struct irte irte;
        unsigned int dest;
        unsigned int irq;
+       int ret = -1;
 
        if (!cpumask_intersects(mask, cpu_online_mask))
-               return;
+               return ret;
 
        irq = desc->irq;
        if (get_irte(irq, &irte))
-               return;
+               return ret;
 
        cfg = desc->chip_data;
        if (assign_irq_vector(irq, cfg, mask))
-               return;
-
-       set_extra_move_desc(desc, mask);
+               return ret;
 
        dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
@@ -2404,27 +2429,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
                send_cleanup_vector(cfg);
 
        cpumask_copy(desc->affinity, mask);
+
+       return 0;
 }
 
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
                                            const struct cpumask *mask)
 {
-       migrate_ioapic_irq_desc(desc, mask);
+       return migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
                                       const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
 
-       set_ir_ioapic_affinity_irq_desc(desc, mask);
+       return set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
                                                   const struct cpumask *mask)
 {
+       return 0;
 }
 #endif
 
@@ -2452,8 +2480,13 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                        continue;
 
                cfg = irq_cfg(irq);
-               spin_lock(&desc->lock);
-               if (!cfg->move_cleanup_count)
+               raw_spin_lock(&desc->lock);
+
+               /*
+                * Check if the irq migration is in progress. If so, we
+                * haven't received the cleanup request yet for this irq.
+                */
+               if (cfg->move_in_progress)
                        goto unlock;
 
                if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
@@ -2472,73 +2505,95 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
                }
                __get_cpu_var(vector_irq)[vector] = -1;
-               cfg->move_cleanup_count--;
 unlock:
-               spin_unlock(&desc->lock);
+               raw_spin_unlock(&desc->lock);
        }
 
        irq_exit();
 }
 
-static void irq_complete_move(struct irq_desc **descp)
+static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
 {
        struct irq_desc *desc = *descp;
        struct irq_cfg *cfg = desc->chip_data;
-       unsigned vector, me;
-
-       if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-               if (likely(!cfg->move_desc_pending))
-                       return;
+       unsigned me;
 
-               /* domain has not changed, but affinity did */
-               me = smp_processor_id();
-               if (cpumask_test_cpu(me, desc->affinity)) {
-                       *descp = desc = move_irq_desc(desc, me);
-                       /* get the new one */
-                       cfg = desc->chip_data;
-                       cfg->move_desc_pending = 0;
-               }
-#endif
+       if (likely(!cfg->move_in_progress))
                return;
-       }
 
-       vector = ~get_irq_regs()->orig_ax;
        me = smp_processor_id();
 
-       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-               *descp = desc = move_irq_desc(desc, me);
-               /* get the new one */
-               cfg = desc->chip_data;
-#endif
+       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
                send_cleanup_vector(cfg);
-       }
+}
+
+static void irq_complete_move(struct irq_desc **descp)
+{
+       __irq_complete_move(descp, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg = desc->chip_data;
+
+       __irq_complete_move(&desc, cfg->vector);
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
 
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+static void ack_apic_edge(unsigned int irq)
 {
-       int apic, pin;
-       struct irq_pin_list *entry;
+       struct irq_desc *desc = irq_to_desc(irq);
 
-       entry = cfg->irq_2_pin;
-       for (;;) {
+       irq_complete_move(&desc);
+       move_native_irq(irq);
+       ack_APIC_irq();
+}
 
-               if (!entry)
-                       break;
+atomic_t irq_mis_count;
 
-               apic = entry->apic;
-               pin = entry->pin;
-               io_apic_eoi(apic, pin);
-               entry = entry->next;
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ *     0Xh     82489DX
+ *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
+ *     30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+*/
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+       struct irq_pin_list *entry;
+
+       for_each_irq_pin(entry, cfg->irq_2_pin) {
+               if (mp_ioapics[entry->apic].apicver >= 0x20) {
+                       /*
+                        * Intr-remapping uses pin number as the virtual vector
+                        * in the RTE. Actual vector is programmed in
+                        * intr-remapping table entry. Hence for the io-apic
+                        * EOI we use the pin number.
+                        */
+                       if (irq_remapped(irq))
+                               io_apic_eoi(entry->apic, entry->pin);
+                       else
+                               io_apic_eoi(entry->apic, cfg->vector);
+               } else {
+                       __mask_and_edge_IO_APIC_irq(entry);
+                       __unmask_and_level_IO_APIC_irq(entry);
+               }
        }
 }
 
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
+static void eoi_ioapic_irq(struct irq_desc *desc)
 {
        struct irq_cfg *cfg;
        unsigned long flags;
@@ -2547,44 +2602,16 @@ eoi_ioapic_irq(struct irq_desc *desc)
        irq = desc->irq;
        cfg = desc->chip_data;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        __eoi_ioapic_irq(irq, cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-       ack_x2APIC_irq();
-       eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-       ack_x2APIC_irq();
-}
-#endif
-
-static void ack_apic_edge(unsigned int irq)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-
-       irq_complete_move(&desc);
-       move_native_irq(irq);
-       ack_APIC_irq();
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-atomic_t irq_mis_count;
-
 static void ack_apic_level(unsigned int irq)
 {
        struct irq_desc *desc = irq_to_desc(irq);
-
-#ifdef CONFIG_X86_32
        unsigned long v;
        int i;
-#endif
        struct irq_cfg *cfg;
        int do_unmask_irq = 0;
 
@@ -2597,31 +2624,41 @@ static void ack_apic_level(unsigned int irq)
        }
 #endif
 
-#ifdef CONFIG_X86_32
        /*
-       * It appears there is an erratum which affects at least version 0x11
-       * of I/O APIC (that's the 82093AA and cores integrated into various
-       * chipsets).  Under certain conditions a level-triggered interrupt is
-       * erroneously delivered as edge-triggered one but the respective IRR
-       * bit gets set nevertheless.  As a result the I/O unit expects an EOI
-       * message but it will never arrive and further interrupts are blocked
-       * from the source.  The exact reason is so far unknown, but the
-       * phenomenon was observed when two consecutive interrupt requests
-       * from a given source get delivered to the same CPU and the source is
-       * temporarily disabled in between.
-       *
-       * A workaround is to simulate an EOI message manually.  We achieve it
-       * by setting the trigger mode to edge and then to level when the edge
-       * trigger mode gets detected in the TMR of a local APIC for a
-       * level-triggered interrupt.  We mask the source for the time of the
-       * operation to prevent an edge-triggered interrupt escaping meanwhile.
-       * The idea is from Manfred Spraul.  --macro
-       */
+        * It appears there is an erratum which affects at least version 0x11
+        * of I/O APIC (that's the 82093AA and cores integrated into various
+        * chipsets).  Under certain conditions a level-triggered interrupt is
+        * erroneously delivered as edge-triggered one but the respective IRR
+        * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+        * message but it will never arrive and further interrupts are blocked
+        * from the source.  The exact reason is so far unknown, but the
+        * phenomenon was observed when two consecutive interrupt requests
+        * from a given source get delivered to the same CPU and the source is
+        * temporarily disabled in between.
+        *
+        * A workaround is to simulate an EOI message manually.  We achieve it
+        * by setting the trigger mode to edge and then to level when the edge
+        * trigger mode gets detected in the TMR of a local APIC for a
+        * level-triggered interrupt.  We mask the source for the time of the
+        * operation to prevent an edge-triggered interrupt escaping meanwhile.
+        * The idea is from Manfred Spraul.  --macro
+        *
+        * Also in the case when cpu goes offline, fixup_irqs() will forward
+        * any unhandled interrupt on the offlined cpu to the new cpu
+        * destination that is handling the corresponding interrupt. This
+        * interrupt forwarding is done via IPI's. Hence, in this case also
+        * level-triggered io-apic interrupt will be seen as an edge
+        * interrupt in the IRR. And we can't rely on the cpu's EOI
+        * to be broadcasted to the IO-APIC's which will clear the remoteIRR
+        * corresponding to the level-triggered interrupt. Hence on IO-APIC's
+        * supporting EOI register, we do an explicit EOI to clear the
+        * remote IRR and on IO-APIC's which don't have an EOI register,
+        * we use the above logic (mask+edge followed by unmask+level) from
+        * Manfred Spraul to clear the remote IRR.
+        */
        cfg = desc->chip_data;
        i = cfg->vector;
-
        v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-#endif
 
        /*
         * We must acknowledge the irq before we move it or the acknowledge will
@@ -2629,8 +2666,18 @@ static void ack_apic_level(unsigned int irq)
         */
        ack_APIC_irq();
 
-       if (irq_remapped(irq))
+       /*
+        * Tail end of clearing remote IRR bit (either by delivering the EOI
+        * message via io-apic EOI register write or simulating it using
+        * mask+edge followed by unnask+level logic) manually when the
+        * level triggered interrupt is seen as the edge triggered interrupt
+        * at the cpu.
+        */
+       if (!(v & (1 << (i & 0x1f)))) {
+               atomic_inc(&irq_mis_count);
+
                eoi_ioapic_irq(desc);
+       }
 
        /* Now we can move and renable the irq */
        if (unlikely(do_unmask_irq)) {
@@ -2665,35 +2712,20 @@ static void ack_apic_level(unsigned int irq)
                        move_masked_irq(irq);
                unmask_IO_APIC_irq_desc(desc);
        }
-
-#ifdef CONFIG_X86_32
-       if (!(v & (1 << (i & 0x1f)))) {
-               atomic_inc(&irq_mis_count);
-               spin_lock(&ioapic_lock);
-               __mask_and_edge_IO_APIC_irq(cfg);
-               __unmask_and_level_IO_APIC_irq(cfg);
-               spin_unlock(&ioapic_lock);
-       }
-#endif
 }
 
 #ifdef CONFIG_INTR_REMAP
 static void ir_ack_apic_edge(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_edge(irq);
-#endif
-       return ack_apic_edge(irq);
+       ack_APIC_irq();
 }
 
 static void ir_ack_apic_level(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_level(irq);
-#endif
-       return ack_apic_level(irq);
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       ack_APIC_irq();
+       eoi_ioapic_irq(desc);
 }
 #endif /* CONFIG_INTR_REMAP */
 
@@ -2750,7 +2782,7 @@ static inline void init_IO_APIC_traps(void)
                         * so default to an old-fashioned 8259
                         * interrupt if we can..
                         */
-                       if (irq < NR_IRQS_LEGACY)
+                       if (irq < nr_legacy_irqs)
                                make_8259A_irq(irq);
                        else
                                /* Strange. Oh, well.. */
@@ -2898,7 +2930,7 @@ static inline void __init check_timer(void)
 {
        struct irq_desc *desc = irq_to_desc(0);
        struct irq_cfg *cfg = desc->chip_data;
-       int cpu = boot_cpu_id;
+       int node = cpu_to_node(boot_cpu_id);
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
        int no_pin1 = 0;
@@ -2964,7 +2996,7 @@ static inline void __init check_timer(void)
                 * Ok, does IRQ0 through the IOAPIC work?
                 */
                if (no_pin1) {
-                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+                       add_pin_to_irq_node(cfg, node, apic1, pin1);
                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                } else {
                        /* for edge trigger, setup_IO_APIC_irq already
@@ -3001,7 +3033,7 @@ static inline void __init check_timer(void)
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+               replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
                setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
                enable_8259A_irq(0);
                if (timer_irq_works()) {
@@ -3086,7 +3118,7 @@ out:
  * the I/O APIC in all cases now.  No actual device should request
  * it anyway.  --macro
  */
-#define PIC_IRQS       (1 << PIC_CASCADE_IR)
+#define PIC_IRQS       (1UL << PIC_CASCADE_IR)
 
 void __init setup_IO_APIC(void)
 {
@@ -3094,21 +3126,19 @@ void __init setup_IO_APIC(void)
        /*
         * calling enable_IO_APIC() is moved to setup_local_APIC for BP
         */
-
-       io_apic_irqs = ~PIC_IRQS;
+       io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
 
        apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
        /*
          * Set up IO-APIC IRQ routing.
          */
-#ifdef CONFIG_X86_32
-       if (!acpi_ioapic)
-               setup_ioapic_ids_from_mpc();
-#endif
+       x86_init.mpparse.setup_ioapic_ids();
+
        sync_Arb_IDs();
        setup_IO_APIC_irqs();
        init_IO_APIC_traps();
-       check_timer();
+       if (nr_legacy_irqs)
+               check_timer();
 }
 
 /*
@@ -3156,13 +3186,13 @@ static int ioapic_resume(struct sys_device *dev)
        data = container_of(dev, struct sysfs_ioapic_data, dev);
        entry = data->entry;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_00.raw = io_apic_read(dev->id, 0);
        if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
                reg_00.bits.ID = mp_ioapics[dev->id].apicid;
                io_apic_write(dev->id, 0, reg_00.raw);
        }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
        for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
                ioapic_write_entry(dev->id, i, entry[i]);
 
@@ -3209,27 +3239,25 @@ static int __init ioapic_init_sysfs(void)
 
 device_initcall(ioapic_init_sysfs);
 
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
        /* Allocate an unused irq */
        unsigned int irq;
        unsigned int new;
        unsigned long flags;
        struct irq_cfg *cfg_new = NULL;
-       int cpu = boot_cpu_id;
        struct irq_desc *desc_new = NULL;
 
        irq = 0;
        if (irq_want < nr_irqs_gsi)
                irq_want = nr_irqs_gsi;
 
-       spin_lock_irqsave(&vector_lock, flags);
+       raw_spin_lock_irqsave(&vector_lock, flags);
        for (new = irq_want; new < nr_irqs; new++) {
-               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               desc_new = irq_to_desc_alloc_node(new, node);
                if (!desc_new) {
                        printk(KERN_INFO "can not get irq_desc for %d\n", new);
                        continue;
@@ -3238,28 +3266,30 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
                if (cfg_new->vector != 0)
                        continue;
+
+               desc_new = move_irq_desc(desc_new, node);
+               cfg_new = desc_new->chip_data;
+
                if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
                        irq = new;
                break;
        }
-       spin_unlock_irqrestore(&vector_lock, flags);
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+       if (irq > 0)
+               dynamic_irq_init_keep_chip_data(irq);
 
-       if (irq > 0) {
-               dynamic_irq_init(irq);
-               /* restore it, in case dynamic_irq_init clear it */
-               if (desc_new)
-                       desc_new->chip_data = cfg_new;
-       }
        return irq;
 }
 
 int create_irq(void)
 {
+       int node = cpu_to_node(boot_cpu_id);
        unsigned int irq_want;
        int irq;
 
        irq_want = nr_irqs_gsi;
-       irq = create_irq_nr(irq_want);
+       irq = create_irq_nr(irq_want, node);
 
        if (irq == 0)
                irq = -1;
@@ -3270,28 +3300,21 @@ int create_irq(void)
 void destroy_irq(unsigned int irq)
 {
        unsigned long flags;
-       struct irq_cfg *cfg;
-       struct irq_desc *desc;
 
-       /* store it, in case dynamic_irq_cleanup clear it */
-       desc = irq_to_desc(irq);
-       cfg = desc->chip_data;
-       dynamic_irq_cleanup(irq);
-       /* connect back irq_cfg */
-       if (desc)
-               desc->chip_data = cfg;
+       dynamic_irq_cleanup_keep_chip_data(irq);
 
        free_irte(irq);
-       spin_lock_irqsave(&vector_lock, flags);
-       __clear_irq_vector(irq, cfg);
-       spin_unlock_irqrestore(&vector_lock, flags);
+       raw_spin_lock_irqsave(&vector_lock, flags);
+       __clear_irq_vector(irq, get_irq_chip_data(irq));
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 /*
  * MSI message composition
  */
 #ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+                          struct msi_msg *msg, u8 hpet_id)
 {
        struct irq_cfg *cfg;
        int err;
@@ -3324,6 +3347,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
                irte.vector = cfg->vector;
                irte.dest_id = IRTE_DEST(dest);
 
+               /* Set source-id of interrupt request */
+               if (pdev)
+                       set_msi_sid(&irte, pdev);
+               else
+                       set_hpet_sid(&irte, hpet_id);
+
                modify_irte(irq, &irte);
 
                msg->address_hi = MSI_ADDR_BASE_HI;
@@ -3361,16 +3390,15 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
+       if (set_desc_affinity(desc, mask, &dest))
+               return -1;
 
        cfg = desc->chip_data;
 
@@ -3382,13 +3410,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        write_msi_msg_desc(desc, &msg);
+
+       return 0;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void
+static int
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
@@ -3397,11 +3427,10 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
        struct irte irte;
 
        if (get_irte(irq, &irte))
-               return;
+               return -1;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
+       if (set_desc_affinity(desc, mask, &dest))
+               return -1;
 
        irte.vector = cfg->vector;
        irte.dest_id = IRTE_DEST(dest);
@@ -3418,6 +3447,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
         */
        if (cfg->move_in_progress)
                send_cleanup_vector(cfg);
+
+       return 0;
 }
 
 #endif
@@ -3483,7 +3514,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
        int ret;
        struct msi_msg msg;
 
-       ret = msi_compose_msg(dev, irq, &msg);
+       ret = msi_compose_msg(dev, irq, &msg, -1);
        if (ret < 0)
                return ret;
 
@@ -3513,15 +3544,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
        unsigned int irq_want;
        struct intel_iommu *iommu = NULL;
        int index = 0;
+       int node;
 
        /* x86 doesn't support multiple MSI yet */
        if (type == PCI_CAP_ID_MSI && nvec > 1)
                return 1;
 
+       node = dev_to_node(&dev->dev);
        irq_want = nr_irqs_gsi;
        sub_handle = 0;
        list_for_each_entry(msidesc, &dev->msi_list, list) {
-               irq = create_irq_nr(irq_want);
+               irq = create_irq_nr(irq_want, node);
                if (irq == 0)
                        return -1;
                irq_want = irq + 1;
@@ -3571,16 +3604,15 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
+       if (set_desc_affinity(desc, mask, &dest))
+               return -1;
 
        cfg = desc->chip_data;
 
@@ -3592,11 +3624,13 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        dmar_msi_write(irq, &msg);
+
+       return 0;
 }
 
 #endif /* CONFIG_SMP */
 
-struct irq_chip dmar_msi_type = {
+static struct irq_chip dmar_msi_type = {
        .name = "DMAR_MSI",
        .unmask = dmar_msi_unmask,
        .mask = dmar_msi_mask,
@@ -3612,7 +3646,7 @@ int arch_setup_dmar_msi(unsigned int irq)
        int ret;
        struct msi_msg msg;
 
-       ret = msi_compose_msg(NULL, irq, &msg);
+       ret = msi_compose_msg(NULL, irq, &msg, -1);
        if (ret < 0)
                return ret;
        dmar_msi_write(irq, &msg);
@@ -3625,16 +3659,15 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
+       if (set_desc_affinity(desc, mask, &dest))
+               return -1;
 
        cfg = desc->chip_data;
 
@@ -3646,10 +3679,25 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        hpet_msi_write(irq, &msg);
+
+       return 0;
 }
 
 #endif /* CONFIG_SMP */
 
+static struct irq_chip ir_hpet_msi_type = {
+       .name = "IR-HPET_MSI",
+       .unmask = hpet_msi_unmask,
+       .mask = hpet_msi_mask,
+#ifdef CONFIG_INTR_REMAP
+       .ack = ir_ack_apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity = ir_set_msi_irq_affinity,
+#endif
+#endif
+       .retrigger = ioapic_retrigger_irq,
+};
+
 static struct irq_chip hpet_msi_type = {
        .name = "HPET_MSI",
        .unmask = hpet_msi_unmask,
@@ -3661,18 +3709,36 @@ static struct irq_chip hpet_msi_type = {
        .retrigger = ioapic_retrigger_irq,
 };
 
-int arch_setup_hpet_msi(unsigned int irq)
+int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 {
        int ret;
        struct msi_msg msg;
+       struct irq_desc *desc = irq_to_desc(irq);
 
-       ret = msi_compose_msg(NULL, irq, &msg);
+       if (intr_remapping_enabled) {
+               struct intel_iommu *iommu = map_hpet_to_ir(id);
+               int index;
+
+               if (!iommu)
+                       return -1;
+
+               index = alloc_irte(iommu, irq, 1);
+               if (index < 0)
+                       return -1;
+       }
+
+       ret = msi_compose_msg(NULL, irq, &msg, id);
        if (ret < 0)
                return ret;
 
        hpet_msi_write(irq, &msg);
-       set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
-               "edge");
+       desc->status |= IRQ_MOVE_PCNTXT;
+       if (irq_remapped(irq))
+               set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
+                                             handle_edge_irq, "edge");
+       else
+               set_irq_chip_and_handler_name(irq, &hpet_msi_type,
+                                             handle_edge_irq, "edge");
 
        return 0;
 }
@@ -3700,19 +3766,20 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
        write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        unsigned int dest;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
-               return;
+       if (set_desc_affinity(desc, mask, &dest))
+               return -1;
 
        cfg = desc->chip_data;
 
        target_ht_irq(irq, dest, cfg->vector);
+
+       return 0;
 }
 
 #endif
@@ -3771,80 +3838,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-                      unsigned long mmr_offset)
-{
-       const struct cpumask *eligible_cpu = cpumask_of(cpu);
-       struct irq_cfg *cfg;
-       int mmr_pnode;
-       unsigned long mmr_value;
-       struct uv_IO_APIC_route_entry *entry;
-       unsigned long flags;
-       int err;
-
-       cfg = irq_cfg(irq);
-
-       err = assign_irq_vector(irq, cfg, eligible_cpu);
-       if (err != 0)
-               return err;
-
-       spin_lock_irqsave(&vector_lock, flags);
-       set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
-                                     irq_name);
-       spin_unlock_irqrestore(&vector_lock, flags);
-
-       mmr_value = 0;
-       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-       entry->vector = cfg->vector;
-       entry->delivery_mode = apic->irq_delivery_mode;
-       entry->dest_mode = apic->irq_dest_mode;
-       entry->polarity = 0;
-       entry->trigger = 0;
-       entry->mask = 0;
-       entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
-
-       mmr_pnode = uv_blade_to_pnode(mmr_blade);
-       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
-       return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
-{
-       unsigned long mmr_value;
-       struct uv_IO_APIC_route_entry *entry;
-       int mmr_pnode;
-
-       mmr_value = 0;
-       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-       entry->mask = 1;
-
-       mmr_pnode = uv_blade_to_pnode(mmr_blade);
-       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-#endif /* CONFIG_X86_64 */
-
 int __init io_apic_get_redir_entries (int ioapic)
 {
        union IO_APIC_reg_01    reg_01;
        unsigned long flags;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_01.raw = io_apic_read(ioapic, 1);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return reg_01.bits.entries;
 }
@@ -3893,11 +3894,97 @@ int __init arch_probe_nr_irqs(void)
 }
 #endif
 
-/* --------------------------------------------------------------------------
-                          ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int node;
+       int ioapic, pin;
+       int trigger, polarity;
 
-#ifdef CONFIG_ACPI
+       ioapic = irq_attr->ioapic;
+       if (!IO_APIC_IRQ(irq)) {
+               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+                       ioapic);
+               return -EINVAL;
+       }
+
+       if (dev)
+               node = dev_to_node(dev);
+       else
+               node = cpu_to_node(boot_cpu_id);
+
+       desc = irq_to_desc_alloc_node(irq, node);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
+
+       pin = irq_attr->ioapic_pin;
+       trigger = irq_attr->trigger;
+       polarity = irq_attr->polarity;
+
+       /*
+        * IRQs < 16 are already in the irq_2_pin[] map
+        */
+       if (irq >= nr_legacy_irqs) {
+               cfg = desc->chip_data;
+               if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+                       printk(KERN_INFO "can not add pin %d for irq %d\n",
+                               pin, irq);
+                       return 0;
+               }
+       }
+
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+       return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       int ioapic, pin;
+       /*
+        * Avoid pin reprogramming.  PRTs typically include entries
+        * with redundant pin->gsi mappings (but unique PCI devices);
+        * we only program the IOAPIC on the first.
+        */
+       ioapic = irq_attr->ioapic;
+       pin = irq_attr->ioapic_pin;
+       if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+               pr_debug("Pin %d-%d already programmed\n",
+                        mp_ioapics[ioapic].apicid, pin);
+               return 0;
+       }
+       set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+       return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
+u8 __init io_apic_unique_id(u8 id)
+{
+#ifdef CONFIG_X86_32
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+           !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return io_apic_get_unique_id(nr_ioapics, id);
+       else
+               return id;
+#else
+       int i;
+       DECLARE_BITMAP(used, 256);
+
+       bitmap_zero(used, 256);
+       for (i = 0; i < nr_ioapics; i++) {
+               struct mpc_ioapic *ia = &mp_ioapics[i];
+               __set_bit(ia->apicid, used);
+       }
+       if (!test_bit(id, used))
+               return id;
+       return find_first_zero_bit(used, 256);
+#endif
+}
 
 #ifdef CONFIG_X86_32
 int __init io_apic_get_unique_id(int ioapic, int apic_id)
@@ -3918,11 +4005,11 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
         */
 
        if (physids_empty(apic_id_map))
-               apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+               apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_00.raw = io_apic_read(ioapic, 0);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        if (apic_id >= get_physical_broadcast()) {
                printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
@@ -3934,10 +4021,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
         * Every APIC in a system must have a unique ID or we get lots of nice
         * 'stuck on smp_invalidate_needed IPI wait' messages.
         */
-       if (apic->check_apicid_used(apic_id_map, apic_id)) {
+       if (apic->check_apicid_used(&apic_id_map, apic_id)) {
 
                for (i = 0; i < get_physical_broadcast(); i++) {
-                       if (!apic->check_apicid_used(apic_id_map, i))
+                       if (!apic->check_apicid_used(&apic_id_map, i))
                                break;
                }
 
@@ -3950,16 +4037,16 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
                apic_id = i;
        }
 
-       tmp = apic->apicid_to_cpu_present(apic_id);
+       apic->apicid_to_cpu_present(apic_id, &tmp);
        physids_or(apic_id_map, apic_id_map, tmp);
 
        if (reg_00.bits.ID != apic_id) {
                reg_00.bits.ID = apic_id;
 
-               spin_lock_irqsave(&ioapic_lock, flags);
+               raw_spin_lock_irqsave(&ioapic_lock, flags);
                io_apic_write(ioapic, 0, reg_00.raw);
                reg_00.raw = io_apic_read(ioapic, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
                /* Sanity check */
                if (reg_00.bits.ID != apic_id) {
@@ -3973,51 +4060,19 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 
        return apic_id;
 }
+#endif
 
 int __init io_apic_get_version(int ioapic)
 {
        union IO_APIC_reg_01    reg_01;
        unsigned long flags;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_01.raw = io_apic_read(ioapic, 1);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return reg_01.bits.version;
 }
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-       struct irq_desc *desc;
-       struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
-
-       if (!IO_APIC_IRQ(irq)) {
-               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-                       ioapic);
-               return -EINVAL;
-       }
-
-       desc = irq_to_desc_alloc_cpu(irq, cpu);
-       if (!desc) {
-               printk(KERN_INFO "can not get irq_desc %d\n", irq);
-               return 0;
-       }
-
-       /*
-        * IRQs < 16 are already in the irq_2_pin[] map
-        */
-       if (irq >= NR_IRQS_LEGACY) {
-               cfg = desc->chip_data;
-               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
-       }
-
-       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
-       return 0;
-}
-
 
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
@@ -4038,8 +4093,6 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
        return 0;
 }
 
-#endif /* CONFIG_ACPI */
-
 /*
  * This function currently is only a helper for the i386 smp boot process where
  * we need to reprogram the ioredtbls to cater for the cpus which have come online
@@ -4050,49 +4103,38 @@ void __init setup_ioapic_dest(void)
 {
        int pin, ioapic, irq, irq_entry;
        struct irq_desc *desc;
-       struct irq_cfg *cfg;
        const struct cpumask *mask;
 
        if (skip_ioapic_setup == 1)
                return;
 
-       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-                       if (irq_entry == -1)
-                               continue;
-                       irq = pin_2_irq(irq_entry, ioapic, pin);
-
-                       /* setup_IO_APIC_irqs could fail to get vector for some device
-                        * when you have too many devices, because at that time only boot
-                        * cpu is online.
-                        */
-                       desc = irq_to_desc(irq);
-                       cfg = desc->chip_data;
-                       if (!cfg->vector) {
-                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
-                                                 irq_trigger(irq_entry),
-                                                 irq_polarity(irq_entry));
-                               continue;
+       for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
+       for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+               irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+               if (irq_entry == -1)
+                       continue;
+               irq = pin_2_irq(irq_entry, ioapic, pin);
 
-                       }
+               if ((ioapic > 0) && (irq > 16))
+                       continue;
 
-                       /*
-                        * Honour affinities which have been set in early boot
-                        */
-                       if (desc->status &
-                           (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-                               mask = desc->affinity;
-                       else
-                               mask = apic->target_cpus();
+               desc = irq_to_desc(irq);
 
-                       if (intr_remapping_enabled)
-                               set_ir_ioapic_affinity_irq_desc(desc, mask);
-                       else
-                               set_ioapic_affinity_irq_desc(desc, mask);
-               }
+               /*
+                * Honour affinities which have been set in early boot
+                */
+               if (desc->status &
+                   (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+                       mask = desc->affinity;
+               else
+                       mask = apic->target_cpus();
 
+               if (intr_remapping_enabled)
+                       set_ir_ioapic_affinity_irq_desc(desc, mask);
+               else
+                       set_ioapic_affinity_irq_desc(desc, mask);
        }
+
 }
 #endif
 
@@ -4100,7 +4142,7 @@ void __init setup_ioapic_dest(void)
 
 static struct resource *ioapic_resources;
 
-static struct resource * __init ioapic_setup_resources(void)
+static struct resource * __init ioapic_setup_resources(int nr_ioapics)
 {
        unsigned long n;
        struct resource *res;
@@ -4116,15 +4158,13 @@ static struct resource * __init ioapic_setup_resources(void)
        mem = alloc_bootmem(n);
        res = (void *)mem;
 
-       if (mem != NULL) {
-               mem += sizeof(struct resource) * nr_ioapics;
+       mem += sizeof(struct resource) * nr_ioapics;
 
-               for (i = 0; i < nr_ioapics; i++) {
-                       res[i].name = mem;
-                       res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-                       sprintf(mem,  "IOAPIC %u", i);
-                       mem += IOAPIC_RESOURCE_NAME_SIZE;
-               }
+       for (i = 0; i < nr_ioapics; i++) {
+               res[i].name = mem;
+               res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
+               mem += IOAPIC_RESOURCE_NAME_SIZE;
        }
 
        ioapic_resources = res;
@@ -4138,7 +4178,7 @@ void __init ioapic_init_mappings(void)
        struct resource *ioapic_res;
        int i;
 
-       ioapic_res = ioapic_setup_resources();
+       ioapic_res = ioapic_setup_resources(nr_ioapics);
        for (i = 0; i < nr_ioapics; i++) {
                if (smp_found_config) {
                        ioapic_phys = mp_ioapics[i].apicaddr;
@@ -4157,46 +4197,108 @@ void __init ioapic_init_mappings(void)
 #ifdef CONFIG_X86_32
 fake_ioapic_page:
 #endif
-                       ioapic_phys = (unsigned long)
-                               alloc_bootmem_pages(PAGE_SIZE);
+                       ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
                        ioapic_phys = __pa(ioapic_phys);
                }
                set_fixmap_nocache(idx, ioapic_phys);
-               apic_printk(APIC_VERBOSE,
-                           "mapped IOAPIC to %08lx (%08lx)\n",
-                           __fix_to_virt(idx), ioapic_phys);
+               apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+                       __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
+                       ioapic_phys);
                idx++;
 
-               if (ioapic_res != NULL) {
-                       ioapic_res->start = ioapic_phys;
-                       ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-                       ioapic_res++;
-               }
+               ioapic_res->start = ioapic_phys;
+               ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
+               ioapic_res++;
        }
 }
 
-static int __init ioapic_insert_resources(void)
+void __init ioapic_insert_resources(void)
 {
        int i;
        struct resource *r = ioapic_resources;
 
        if (!r) {
-               if (nr_ioapics > 0) {
+               if (nr_ioapics > 0)
                        printk(KERN_ERR
                                "IO APIC resources couldn't be allocated.\n");
-                       return -1;
-               }
-               return 0;
+               return;
        }
 
        for (i = 0; i < nr_ioapics; i++) {
                insert_resource(&iomem_resource, r);
                r++;
        }
+}
+
+int mp_find_ioapic(int gsi)
+{
+       int i = 0;
+
+       /* Find the IOAPIC that manages this GSI. */
+       for (i = 0; i < nr_ioapics; i++) {
+               if ((gsi >= mp_gsi_routing[i].gsi_base)
+                   && (gsi <= mp_gsi_routing[i].gsi_end))
+                       return i;
+       }
 
+       printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+       return -1;
+}
+
+int mp_find_ioapic_pin(int ioapic, int gsi)
+{
+       if (WARN_ON(ioapic == -1))
+               return -1;
+       if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
+               return -1;
+
+       return gsi - mp_gsi_routing[ioapic].gsi_base;
+}
+
+static int bad_ioapic(unsigned long address)
+{
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
+                      "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+               return 1;
+       }
+       if (!address) {
+               printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
+                      " found in table, skipping!\n");
+               return 1;
+       }
        return 0;
 }
 
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+{
+       int idx = 0;
+
+       if (bad_ioapic(address))
+               return;
+
+       idx = nr_ioapics;
+
+       mp_ioapics[idx].type = MP_IOAPIC;
+       mp_ioapics[idx].flags = MPC_APIC_USABLE;
+       mp_ioapics[idx].apicaddr = address;
+
+       set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+       mp_ioapics[idx].apicid = io_apic_unique_id(id);
+       mp_ioapics[idx].apicver = io_apic_get_version(idx);
+
+       /*
+        * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+        * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+        */
+       mp_gsi_routing[idx].gsi_base = gsi_base;
+       mp_gsi_routing[idx].gsi_end = gsi_base +
+           io_apic_get_redir_entries(idx);
+
+       printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+              "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
+              mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
+              mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);
+
+       nr_ioapics++;
+}