x86, ioapic: Optimize pin_2_irq
[safe/jmp/linux-2.6] / arch / x86 / kernel / apic / io_apic.c
index 20ea839..0d35f46 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>     /* time_after() */
+#include <linux/slab.h>
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
@@ -73,8 +74,8 @@
  */
 int sis_apic_bug = -1;
 
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(vector_lock);
 
 /*
  * # of IRQ routing registers
@@ -88,14 +89,15 @@ int nr_ioapics;
 /* IO APIC gsi routing info */
 struct mp_ioapic_gsi  mp_gsi_routing[MAX_IO_APICS];
 
+/* The last gsi number used */
+u32 gsi_end;
+
 /* MP IRQ source entries */
 struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
-/* Number of legacy interrupts */
-static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
 /* GSI interrupts */
 static int nr_irqs_gsi = NR_IRQS_LEGACY;
 
@@ -140,33 +142,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 #ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg irq_cfgx[] = {
+static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
 #else
-static struct irq_cfg irq_cfgx[NR_IRQS] = {
+static struct irq_cfg irq_cfgx[NR_IRQS];
 #endif
-       [0]  = { .vector = IRQ0_VECTOR,  },
-       [1]  = { .vector = IRQ1_VECTOR,  },
-       [2]  = { .vector = IRQ2_VECTOR,  },
-       [3]  = { .vector = IRQ3_VECTOR,  },
-       [4]  = { .vector = IRQ4_VECTOR,  },
-       [5]  = { .vector = IRQ5_VECTOR,  },
-       [6]  = { .vector = IRQ6_VECTOR,  },
-       [7]  = { .vector = IRQ7_VECTOR,  },
-       [8]  = { .vector = IRQ8_VECTOR,  },
-       [9]  = { .vector = IRQ9_VECTOR,  },
-       [10] = { .vector = IRQ10_VECTOR, },
-       [11] = { .vector = IRQ11_VECTOR, },
-       [12] = { .vector = IRQ12_VECTOR, },
-       [13] = { .vector = IRQ13_VECTOR, },
-       [14] = { .vector = IRQ14_VECTOR, },
-       [15] = { .vector = IRQ15_VECTOR, },
-};
-
-void __init io_apic_disable_legacy(void)
-{
-       nr_legacy_irqs = 0;
-       nr_irqs_gsi = 0;
-}
 
 int __init arch_early_irq_init(void)
 {
@@ -176,6 +155,11 @@ int __init arch_early_irq_init(void)
        int node;
        int i;
 
+       if (!legacy_pic->nr_legacy_irqs) {
+               nr_irqs_gsi = 0;
+               io_apic_irqs = ~0UL;
+       }
+
        cfg = irq_cfgx;
        count = ARRAY_SIZE(irq_cfgx);
        node= cpu_to_node(boot_cpu_id);
@@ -185,8 +169,14 @@ int __init arch_early_irq_init(void)
                desc->chip_data = &cfg[i];
                zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
                zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
-               if (i < nr_legacy_irqs)
-                       cpumask_setall(cfg[i].domain);
+               /*
+                * For legacy IRQ's, start with assigning irq0 to irq15 to
+                * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
+                */
+               if (i < legacy_pic->nr_legacy_irqs) {
+                       cfg[i].vector = IRQ0_VECTOR + i;
+                       cpumask_set_cpu(0, cfg[i].domain);
+               }
        }
 
        return 0;
@@ -406,7 +396,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
        struct irq_pin_list *entry;
        unsigned long flags;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        for_each_irq_pin(entry, cfg->irq_2_pin) {
                unsigned int reg;
                int pin;
@@ -415,11 +405,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
                reg = io_apic_read(entry->apic, 0x10 + pin*2);
                /* Is the remote IRR bit set? */
                if (reg & IO_APIC_REDIR_REMOTE_IRR) {
-                       spin_unlock_irqrestore(&ioapic_lock, flags);
+                       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
                        return true;
                }
        }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return false;
 }
@@ -433,10 +423,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 {
        union entry_union eu;
        unsigned long flags;
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
        eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
        return eu.entry;
 }
 
@@ -459,9 +449,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
        unsigned long flags;
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        __ioapic_write_entry(apic, pin, e);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 /*
@@ -474,10 +464,10 @@ static void ioapic_mask_entry(int apic, int pin)
        unsigned long flags;
        union entry_union eu = { .entry.mask = 1 };
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 /*
@@ -539,23 +529,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
        add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
+                                int mask_and, int mask_or,
+                                void (*final)(struct irq_pin_list *entry))
+{
+       unsigned int reg, pin;
+
+       pin = entry->pin;
+       reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+       reg &= mask_and;
+       reg |= mask_or;
+       io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+       if (final)
+               final(entry);
+}
+
 static void io_apic_modify_irq(struct irq_cfg *cfg,
                               int mask_and, int mask_or,
                               void (*final)(struct irq_pin_list *entry))
 {
-       int pin;
        struct irq_pin_list *entry;
 
-       for_each_irq_pin(entry, cfg->irq_2_pin) {
-               unsigned int reg;
-               pin = entry->pin;
-               reg = io_apic_read(entry->apic, 0x10 + pin * 2);
-               reg &= mask_and;
-               reg |= mask_or;
-               io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
-               if (final)
-                       final(entry);
-       }
+       for_each_irq_pin(entry, cfg->irq_2_pin)
+               __io_apic_modify_irq(entry, mask_and, mask_or, final);
+}
+
+static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
+{
+       __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+                            IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
+{
+       __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
+                            IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
 
 static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
@@ -579,18 +587,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
        io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
 
-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
-                       IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
-{
-       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
-                       IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-
 static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
        struct irq_cfg *cfg = desc->chip_data;
@@ -598,9 +594,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 
        BUG_ON(!cfg);
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        __mask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
@@ -608,9 +604,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
        struct irq_cfg *cfg = desc->chip_data;
        unsigned long flags;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        __unmask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void mask_IO_APIC_irq(unsigned int irq)
@@ -859,7 +855,7 @@ static int __init find_isa_irq_apic(int irq, int type)
  */
 static int EISA_ELCR(unsigned int irq)
 {
-       if (irq < nr_legacy_irqs) {
+       if (irq < legacy_pic->nr_legacy_irqs) {
                unsigned int port = 0x4d0 + (irq >> 3);
                return (inb(port) >> (irq & 7)) & 1;
        }
@@ -1023,7 +1019,7 @@ static inline int irq_trigger(int idx)
 int (*ioapic_renumber_irq)(int ioapic, int irq);
 static int pin_2_irq(int idx, int apic, int pin)
 {
-       int irq, i;
+       int irq;
        int bus = mp_irqs[idx].srcbus;
 
        /*
@@ -1035,18 +1031,13 @@ static int pin_2_irq(int idx, int apic, int pin)
        if (test_bit(bus, mp_bus_not_pci)) {
                irq = mp_irqs[idx].srcbusirq;
        } else {
-               /*
-                * PCI IRQs are mapped in order
-                */
-               i = irq = 0;
-               while (i < apic)
-                       irq += nr_ioapic_registers[i++];
-               irq += pin;
+               u32 gsi = mp_gsi_routing[apic].gsi_base + pin;
                /*
                  * For MPS mode, so far only needed by ES7000 platform
                  */
                if (ioapic_renumber_irq)
-                       irq = ioapic_renumber_irq(apic, irq);
+                       gsi = ioapic_renumber_irq(apic, gsi);
+               irq = gsi;
        }
 
 #ifdef CONFIG_X86_32
@@ -1134,12 +1125,12 @@ void lock_vector_lock(void)
        /* Used to the online set of cpus does not change
         * during assign_irq_vector.
         */
-       spin_lock(&vector_lock);
+       raw_spin_lock(&vector_lock);
 }
 
 void unlock_vector_lock(void)
 {
-       spin_unlock(&vector_lock);
+       raw_spin_unlock(&vector_lock);
 }
 
 static int
@@ -1156,7 +1147,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
         * Also, we've got to be careful not to trash gate
         * 0x80, because int 0x80 is hm, kind of importantish. ;)
         */
-       static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+       static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+       static int current_offset = VECTOR_OFFSET_START % 8;
        unsigned int old_vector;
        int cpu, err;
        cpumask_var_t tmp_mask;
@@ -1192,7 +1184,7 @@ next:
                if (vector >= first_system_vector) {
                        /* If out of vectors on large boxen, must share them. */
                        offset = (offset + 1) % 8;
-                       vector = FIRST_DEVICE_VECTOR + offset;
+                       vector = FIRST_EXTERNAL_VECTOR + offset;
                }
                if (unlikely(current_vector == vector))
                        continue;
@@ -1226,9 +1218,9 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
        int err;
        unsigned long flags;
 
-       spin_lock_irqsave(&vector_lock, flags);
+       raw_spin_lock_irqsave(&vector_lock, flags);
        err = __assign_irq_vector(irq, cfg, mask);
-       spin_unlock_irqrestore(&vector_lock, flags);
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
        return err;
 }
 
@@ -1262,14 +1254,27 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 void __setup_vector_irq(int cpu)
 {
        /* Initialize vector_irq on a new cpu */
-       /* This function must be called with vector_lock held */
        int irq, vector;
        struct irq_cfg *cfg;
        struct irq_desc *desc;
 
+       /*
+        * vector_lock will make sure that we don't run into irq vector
+        * assignments that might be happening on another cpu in parallel,
+        * while we setup our initial vector to irq mappings.
+        */
+       raw_spin_lock(&vector_lock);
        /* Mark the inuse vectors */
        for_each_irq_desc(irq, desc) {
                cfg = desc->chip_data;
+
+               /*
+                * If it is a legacy IRQ handled by the legacy PIC, this cpu
+                * will be part of the irq_cfg's domain.
+                */
+               if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
+                       cpumask_set_cpu(cpu, cfg->domain);
+
                if (!cpumask_test_cpu(cpu, cfg->domain))
                        continue;
                vector = cfg->vector;
@@ -1285,6 +1290,7 @@ void __setup_vector_irq(int cpu)
                if (!cpumask_test_cpu(cpu, cfg->domain))
                        per_cpu(vector_irq, cpu)[vector] = -1;
        }
+       raw_spin_unlock(&vector_lock);
 }
 
 static struct irq_chip ioapic_chip;
@@ -1434,6 +1440,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
 
        cfg = desc->chip_data;
 
+       /*
+        * For legacy irqs, cfg->domain starts with cpu 0 for legacy
+        * controllers like 8259. Now that IO-APIC can handle this irq, update
+        * the cfg->domain.
+        */
+       if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
+               apic->vector_allocation_domain(0, cfg->domain);
+
        if (assign_irq_vector(irq, cfg, apic->target_cpus()))
                return;
 
@@ -1455,8 +1469,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
        }
 
        ioapic_register_intr(irq, desc, trigger);
-       if (irq < nr_legacy_irqs)
-               disable_8259A_irq(irq);
+       if (irq < legacy_pic->nr_legacy_irqs)
+               legacy_pic->chip->mask(irq);
 
        ioapic_write_entry(apic_id, pin, entry);
 }
@@ -1467,7 +1481,7 @@ static struct {
 
 static void __init setup_IO_APIC_irqs(void)
 {
-       int apic_id = 0, pin, idx, irq;
+       int apic_id, pin, idx, irq;
        int notcon = 0;
        struct irq_desc *desc;
        struct irq_cfg *cfg;
@@ -1475,14 +1489,7 @@ static void __init setup_IO_APIC_irqs(void)
 
        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-#ifdef CONFIG_ACPI
-       if (!acpi_disabled && acpi_ioapic) {
-               apic_id = mp_find_ioapic(0);
-               if (apic_id < 0)
-                       apic_id = 0;
-       }
-#endif
-
+       for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
        for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
                idx = find_irq_entry(apic_id, pin, mp_INT);
                if (idx == -1) {
@@ -1504,6 +1511,9 @@ static void __init setup_IO_APIC_irqs(void)
 
                irq = pin_2_irq(idx, apic_id, pin);
 
+               if ((apic_id > 0) && (irq > 16))
+                       continue;
+
                /*
                 * Skip the timer IRQ if there's a quirk handler
                 * installed and if it returns 1:
@@ -1533,6 +1543,56 @@ static void __init setup_IO_APIC_irqs(void)
 }
 
 /*
+ * for the gsit that is not in first ioapic
+ * but could not use acpi_register_gsi()
+ * like some special sci in IBM x3330
+ */
+void setup_IO_APIC_irq_extra(u32 gsi)
+{
+       int apic_id = 0, pin, idx, irq;
+       int node = cpu_to_node(boot_cpu_id);
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+
+       /*
+        * Convert 'gsi' to 'ioapic.pin'.
+        */
+       apic_id = mp_find_ioapic(gsi);
+       if (apic_id < 0)
+               return;
+
+       pin = mp_find_ioapic_pin(apic_id, gsi);
+       idx = find_irq_entry(apic_id, pin, mp_INT);
+       if (idx == -1)
+               return;
+
+       irq = pin_2_irq(idx, apic_id, pin);
+#ifdef CONFIG_SPARSE_IRQ
+       desc = irq_to_desc(irq);
+       if (desc)
+               return;
+#endif
+       desc = irq_to_desc_alloc_node(irq, node);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+               return;
+       }
+
+       cfg = desc->chip_data;
+       add_pin_to_irq_node(cfg, node, apic_id, pin);
+
+       if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
+               pr_debug("Pin %d-%d already programmed\n",
+                        mp_ioapics[apic_id].apicid, pin);
+               return;
+       }
+       set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+
+       setup_IO_APIC_irq(apic_id, pin, irq, desc,
+                       irq_trigger(idx), irq_polarity(idx));
+}
+
+/*
  * Set up the timer pin, possibly with the 8259A-master behind.
  */
 static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
@@ -1595,14 +1655,14 @@ __apicdebuginit(void) print_IO_APIC(void)
 
        for (apic = 0; apic < nr_ioapics; apic++) {
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_00.raw = io_apic_read(apic, 0);
        reg_01.raw = io_apic_read(apic, 1);
        if (reg_01.bits.version >= 0x10)
                reg_02.raw = io_apic_read(apic, 2);
        if (reg_01.bits.version >= 0x20)
                reg_03.raw = io_apic_read(apic, 3);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        printk("\n");
        printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
@@ -1641,7 +1701,7 @@ __apicdebuginit(void) print_IO_APIC(void)
        printk(KERN_DEBUG ".... IRQ redirection table:\n");
 
        printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
-                         " Stat Dmod Deli Vect:   \n");
+                         " Stat Dmod Deli Vect:\n");
 
        for (i = 0; i <= reg_01.bits.entries; i++) {
                struct IO_APIC_route_entry entry;
@@ -1819,12 +1879,12 @@ __apicdebuginit(void) print_PIC(void)
        unsigned int v;
        unsigned long flags;
 
-       if (!nr_legacy_irqs)
+       if (!legacy_pic->nr_legacy_irqs)
                return;
 
        printk(KERN_DEBUG "\nprinting PIC contents\n");
 
-       spin_lock_irqsave(&i8259A_lock, flags);
+       raw_spin_lock_irqsave(&i8259A_lock, flags);
 
        v = inb(0xa1) << 8 | inb(0x21);
        printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
@@ -1838,7 +1898,7 @@ __apicdebuginit(void) print_PIC(void)
        outb(0x0a,0xa0);
        outb(0x0a,0x20);
 
-       spin_unlock_irqrestore(&i8259A_lock, flags);
+       raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 
        printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
 
@@ -1888,22 +1948,10 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 void __init enable_IO_APIC(void)
 {
-       union IO_APIC_reg_01 reg_01;
        int i8259_apic, i8259_pin;
        int apic;
-       unsigned long flags;
-
-       /*
-        * The number of IO-APIC IRQ registers (== #pins):
-        */
-       for (apic = 0; apic < nr_ioapics; apic++) {
-               spin_lock_irqsave(&ioapic_lock, flags);
-               reg_01.raw = io_apic_read(apic, 1);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-       }
 
-       if (!nr_legacy_irqs)
+       if (!legacy_pic->nr_legacy_irqs)
                return;
 
        for(apic = 0; apic < nr_ioapics; apic++) {
@@ -1960,7 +2008,7 @@ void disable_IO_APIC(void)
         */
        clear_IO_APIC();
 
-       if (!nr_legacy_irqs)
+       if (!legacy_pic->nr_legacy_irqs)
                return;
 
        /*
@@ -2039,9 +2087,9 @@ void __init setup_ioapic_ids_from_mpc(void)
        for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
 
                /* Read the register 0 value */
-               spin_lock_irqsave(&ioapic_lock, flags);
+               raw_spin_lock_irqsave(&ioapic_lock, flags);
                reg_00.raw = io_apic_read(apic_id, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
                old_id = mp_ioapics[apic_id].apicid;
 
@@ -2100,16 +2148,16 @@ void __init setup_ioapic_ids_from_mpc(void)
                        mp_ioapics[apic_id].apicid);
 
                reg_00.bits.ID = mp_ioapics[apic_id].apicid;
-               spin_lock_irqsave(&ioapic_lock, flags);
+               raw_spin_lock_irqsave(&ioapic_lock, flags);
                io_apic_write(apic_id, 0, reg_00.raw);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
                /*
                 * Sanity check
                 */
-               spin_lock_irqsave(&ioapic_lock, flags);
+               raw_spin_lock_irqsave(&ioapic_lock, flags);
                reg_00.raw = io_apic_read(apic_id, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               raw_spin_unlock_irqrestore(&ioapic_lock, flags);
                if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
                        printk("could not set ID!\n");
                else
@@ -2192,15 +2240,15 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
        unsigned long flags;
        struct irq_cfg *cfg;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < nr_legacy_irqs) {
-               disable_8259A_irq(irq);
-               if (i8259A_irq_pending(irq))
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < legacy_pic->nr_legacy_irqs) {
+               legacy_pic->chip->mask(irq);
+               if (legacy_pic->irq_pending(irq))
                        was_pending = 1;
        }
        cfg = irq_cfg(irq);
        __unmask_IO_APIC_irq(cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return was_pending;
 }
@@ -2211,9 +2259,9 @@ static int ioapic_retrigger_irq(unsigned int irq)
        struct irq_cfg *cfg = irq_cfg(irq);
        unsigned long flags;
 
-       spin_lock_irqsave(&vector_lock, flags);
+       raw_spin_lock_irqsave(&vector_lock, flags);
        apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
-       spin_unlock_irqrestore(&vector_lock, flags);
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
 
        return 1;
 }
@@ -2270,26 +2318,28 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 
 /*
  * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
  * leaves desc->affinity untouched.
  */
 unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
+                 unsigned int *dest_id)
 {
        struct irq_cfg *cfg;
        unsigned int irq;
 
        if (!cpumask_intersects(mask, cpu_online_mask))
-               return BAD_APICID;
+               return -1;
 
        irq = desc->irq;
        cfg = desc->chip_data;
        if (assign_irq_vector(irq, cfg, mask))
-               return BAD_APICID;
+               return -1;
 
        cpumask_copy(desc->affinity, mask);
 
-       return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+       *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+       return 0;
 }
 
 static int
@@ -2304,15 +2354,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
        irq = desc->irq;
        cfg = desc->chip_data;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       dest = set_desc_affinity(desc, mask);
-       if (dest != BAD_APICID) {
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
+       ret = set_desc_affinity(desc, mask, &dest);
+       if (!ret) {
                /* Only the high 8 bits are valid. */
                dest = SET_APIC_LOGICAL_ID(dest);
                __target_IO_APIC_irq(irq, dest, cfg);
-               ret = 0;
        }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return ret;
 }
@@ -2425,7 +2474,14 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                        continue;
 
                cfg = irq_cfg(irq);
-               spin_lock(&desc->lock);
+               raw_spin_lock(&desc->lock);
+
+               /*
+                * Check if the irq migration is in progress. If so, we
+                * haven't received the cleanup request yet for this irq.
+                */
+               if (cfg->move_in_progress)
+                       goto unlock;
 
                if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
                        goto unlock;
@@ -2444,7 +2500,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                }
                __get_cpu_var(vector_irq)[vector] = -1;
 unlock:
-               spin_unlock(&desc->lock);
+               raw_spin_unlock(&desc->lock);
        }
 
        irq_exit();
@@ -2492,17 +2548,42 @@ static void ack_apic_edge(unsigned int irq)
 
 atomic_t irq_mis_count;
 
-static int use_eoi_reg __read_mostly;
-
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ *     0Xh     82489DX
+ *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
+ *     30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+*/
 static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 {
        struct irq_pin_list *entry;
 
        for_each_irq_pin(entry, cfg->irq_2_pin) {
-               if (irq_remapped(irq))
-                       io_apic_eoi(entry->apic, entry->pin);
-               else
-                       io_apic_eoi(entry->apic, cfg->vector);
+               if (mp_ioapics[entry->apic].apicver >= 0x20) {
+                       /*
+                        * Intr-remapping uses pin number as the virtual vector
+                        * in the RTE. Actual vector is programmed in
+                        * intr-remapping table entry. Hence for the io-apic
+                        * EOI we use the pin number.
+                        */
+                       if (irq_remapped(irq))
+                               io_apic_eoi(entry->apic, entry->pin);
+                       else
+                               io_apic_eoi(entry->apic, cfg->vector);
+               } else {
+                       __mask_and_edge_IO_APIC_irq(entry);
+                       __unmask_and_level_IO_APIC_irq(entry);
+               }
        }
 }
 
@@ -2515,28 +2596,11 @@ static void eoi_ioapic_irq(struct irq_desc *desc)
        irq = desc->irq;
        cfg = desc->chip_data;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        __eoi_ioapic_irq(irq, cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static int ioapic_supports_eoi(void)
-{
-       struct pci_dev *root;
-
-       root = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
-       if (root && root->vendor == PCI_VENDOR_ID_INTEL &&
-           mp_ioapics[0].apicver >= 0x2) {
-               use_eoi_reg = 1;
-               printk(KERN_INFO "IO-APIC supports EOI register\n");
-       } else
-               printk(KERN_INFO "IO-APIC doesn't support EOI\n");
-
-       return 0;
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-fs_initcall(ioapic_supports_eoi);
-
 static void ack_apic_level(unsigned int irq)
 {
        struct irq_desc *desc = irq_to_desc(irq);
@@ -2572,6 +2636,19 @@ static void ack_apic_level(unsigned int irq)
         * level-triggered interrupt.  We mask the source for the time of the
         * operation to prevent an edge-triggered interrupt escaping meanwhile.
         * The idea is from Manfred Spraul.  --macro
+        *
+        * Also in the case when cpu goes offline, fixup_irqs() will forward
+        * any unhandled interrupt on the offlined cpu to the new cpu
+        * destination that is handling the corresponding interrupt. This
+        * interrupt forwarding is done via IPI's. Hence, in this case also
+        * level-triggered io-apic interrupt will be seen as an edge
+        * interrupt in the IRR. And we can't rely on the cpu's EOI
+        * to be broadcasted to the IO-APIC's which will clear the remoteIRR
+        * corresponding to the level-triggered interrupt. Hence on IO-APIC's
+        * supporting EOI register, we do an explicit EOI to clear the
+        * remote IRR and on IO-APIC's which don't have an EOI register,
+        * we use the above logic (mask+edge followed by unmask+level) from
+        * Manfred Spraul to clear the remote IRR.
         */
        cfg = desc->chip_data;
        i = cfg->vector;
@@ -2583,6 +2660,19 @@ static void ack_apic_level(unsigned int irq)
         */
        ack_APIC_irq();
 
+       /*
+        * Tail end of clearing remote IRR bit (either by delivering the EOI
+        * message via io-apic EOI register write or simulating it using
+        * mask+edge followed by unnask+level logic) manually when the
+        * level triggered interrupt is seen as the edge triggered interrupt
+        * at the cpu.
+        */
+       if (!(v & (1 << (i & 0x1f)))) {
+               atomic_inc(&irq_mis_count);
+
+               eoi_ioapic_irq(desc);
+       }
+
        /* Now we can move and renable the irq */
        if (unlikely(do_unmask_irq)) {
                /* Only migrate the irq if the ack has been received.
@@ -2616,20 +2706,6 @@ static void ack_apic_level(unsigned int irq)
                        move_masked_irq(irq);
                unmask_IO_APIC_irq_desc(desc);
        }
-
-       /* Tail end of version 0x11 I/O APIC bug workaround */
-       if (!(v & (1 << (i & 0x1f)))) {
-               atomic_inc(&irq_mis_count);
-
-               if (use_eoi_reg)
-                       eoi_ioapic_irq(desc);
-               else {
-                       spin_lock(&ioapic_lock);
-                       __mask_and_edge_IO_APIC_irq(cfg);
-                       __unmask_and_level_IO_APIC_irq(cfg);
-                       spin_unlock(&ioapic_lock);
-               }
-       }
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -2700,8 +2776,8 @@ static inline void init_IO_APIC_traps(void)
                         * so default to an old-fashioned 8259
                         * interrupt if we can..
                         */
-                       if (irq < nr_legacy_irqs)
-                               make_8259A_irq(irq);
+                       if (irq < legacy_pic->nr_legacy_irqs)
+                               legacy_pic->make_irq(irq);
                        else
                                /* Strange. Oh, well.. */
                                desc->chip = &no_irq_chip;
@@ -2858,7 +2934,7 @@ static inline void __init check_timer(void)
        /*
         * get/set the timer IRQ vector:
         */
-       disable_8259A_irq(0);
+       legacy_pic->chip->mask(0);
        assign_irq_vector(0, cfg, apic->target_cpus());
 
        /*
@@ -2871,7 +2947,7 @@ static inline void __init check_timer(void)
         * automatically.
         */
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-       init_8259A(1);
+       legacy_pic->init(1);
 #ifdef CONFIG_X86_32
        {
                unsigned int ver;
@@ -2930,7 +3006,7 @@ static inline void __init check_timer(void)
                if (timer_irq_works()) {
                        if (nmi_watchdog == NMI_IO_APIC) {
                                setup_nmi();
-                               enable_8259A_irq(0);
+                               legacy_pic->chip->unmask(0);
                        }
                        if (disable_timer_pin_1 > 0)
                                clear_IO_APIC_pin(0, pin1);
@@ -2953,14 +3029,14 @@ static inline void __init check_timer(void)
                 */
                replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
                setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-               enable_8259A_irq(0);
+               legacy_pic->chip->unmask(0);
                if (timer_irq_works()) {
                        apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
                        timer_through_8259 = 1;
                        if (nmi_watchdog == NMI_IO_APIC) {
-                               disable_8259A_irq(0);
+                               legacy_pic->chip->mask(0);
                                setup_nmi();
-                               enable_8259A_irq(0);
+                               legacy_pic->chip->unmask(0);
                        }
                        goto out;
                }
@@ -2968,7 +3044,7 @@ static inline void __init check_timer(void)
                 * Cleanup, just in case ...
                 */
                local_irq_disable();
-               disable_8259A_irq(0);
+               legacy_pic->chip->mask(0);
                clear_IO_APIC_pin(apic2, pin2);
                apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
        }
@@ -2987,22 +3063,22 @@ static inline void __init check_timer(void)
 
        lapic_register_intr(0, desc);
        apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
-       enable_8259A_irq(0);
+       legacy_pic->chip->unmask(0);
 
        if (timer_irq_works()) {
                apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
                goto out;
        }
        local_irq_disable();
-       disable_8259A_irq(0);
+       legacy_pic->chip->mask(0);
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
        apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
 
        apic_printk(APIC_QUIET, KERN_INFO
                    "...trying to set up timer as ExtINT IRQ...\n");
 
-       init_8259A(0);
-       make_8259A_irq(0);
+       legacy_pic->init(0);
+       legacy_pic->make_irq(0);
        apic_write(APIC_LVT0, APIC_DM_EXTINT);
 
        unlock_ExtINT_logic();
@@ -3044,7 +3120,7 @@ void __init setup_IO_APIC(void)
        /*
         * calling enable_IO_APIC() is moved to setup_local_APIC for BP
         */
-       io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
+       io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
 
        apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
        /*
@@ -3055,7 +3131,7 @@ void __init setup_IO_APIC(void)
        sync_Arb_IDs();
        setup_IO_APIC_irqs();
        init_IO_APIC_traps();
-       if (nr_legacy_irqs)
+       if (legacy_pic->nr_legacy_irqs)
                check_timer();
 }
 
@@ -3104,13 +3180,13 @@ static int ioapic_resume(struct sys_device *dev)
        data = container_of(dev, struct sysfs_ioapic_data, dev);
        entry = data->entry;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_00.raw = io_apic_read(dev->id, 0);
        if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
                reg_00.bits.ID = mp_ioapics[dev->id].apicid;
                io_apic_write(dev->id, 0, reg_00.raw);
        }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
        for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
                ioapic_write_entry(dev->id, i, entry[i]);
 
@@ -3173,7 +3249,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
        if (irq_want < nr_irqs_gsi)
                irq_want = nr_irqs_gsi;
 
-       spin_lock_irqsave(&vector_lock, flags);
+       raw_spin_lock_irqsave(&vector_lock, flags);
        for (new = irq_want; new < nr_irqs; new++) {
                desc_new = irq_to_desc_alloc_node(new, node);
                if (!desc_new) {
@@ -3186,19 +3262,17 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
                        continue;
 
                desc_new = move_irq_desc(desc_new, node);
+               cfg_new = desc_new->chip_data;
 
                if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
                        irq = new;
                break;
        }
-       spin_unlock_irqrestore(&vector_lock, flags);
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+       if (irq > 0)
+               dynamic_irq_init_keep_chip_data(irq);
 
-       if (irq > 0) {
-               dynamic_irq_init(irq);
-               /* restore it, in case dynamic_irq_init clear it */
-               if (desc_new)
-                       desc_new->chip_data = cfg_new;
-       }
        return irq;
 }
 
@@ -3220,27 +3294,21 @@ int create_irq(void)
 void destroy_irq(unsigned int irq)
 {
        unsigned long flags;
-       struct irq_cfg *cfg;
-       struct irq_desc *desc;
 
-       /* store it, in case dynamic_irq_cleanup clear it */
-       desc = irq_to_desc(irq);
-       cfg = desc->chip_data;
-       dynamic_irq_cleanup(irq);
-       /* connect back irq_cfg */
-       desc->chip_data = cfg;
+       dynamic_irq_cleanup_keep_chip_data(irq);
 
        free_irte(irq);
-       spin_lock_irqsave(&vector_lock, flags);
-       __clear_irq_vector(irq, cfg);
-       spin_unlock_irqrestore(&vector_lock, flags);
+       raw_spin_lock_irqsave(&vector_lock, flags);
+       __clear_irq_vector(irq, get_irq_chip_data(irq));
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 /*
  * MSI message composition
  */
 #ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+                          struct msi_msg *msg, u8 hpet_id)
 {
        struct irq_cfg *cfg;
        int err;
@@ -3274,7 +3342,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
                irte.dest_id = IRTE_DEST(dest);
 
                /* Set source-id of interrupt request */
-               set_msi_sid(&irte, pdev);
+               if (pdev)
+                       set_msi_sid(&irte, pdev);
+               else
+                       set_hpet_sid(&irte, hpet_id);
 
                modify_irte(irq, &irte);
 
@@ -3320,8 +3391,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
        struct msi_msg msg;
        unsigned int dest;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
+       if (set_desc_affinity(desc, mask, &dest))
                return -1;
 
        cfg = desc->chip_data;
@@ -3353,8 +3423,7 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
        if (get_irte(irq, &irte))
                return -1;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
+       if (set_desc_affinity(desc, mask, &dest))
                return -1;
 
        irte.vector = cfg->vector;
@@ -3439,7 +3508,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
        int ret;
        struct msi_msg msg;
 
-       ret = msi_compose_msg(dev, irq, &msg);
+       ret = msi_compose_msg(dev, irq, &msg, -1);
        if (ret < 0)
                return ret;
 
@@ -3536,8 +3605,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
        struct msi_msg msg;
        unsigned int dest;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
+       if (set_desc_affinity(desc, mask, &dest))
                return -1;
 
        cfg = desc->chip_data;
@@ -3572,7 +3640,7 @@ int arch_setup_dmar_msi(unsigned int irq)
        int ret;
        struct msi_msg msg;
 
-       ret = msi_compose_msg(NULL, irq, &msg);
+       ret = msi_compose_msg(NULL, irq, &msg, -1);
        if (ret < 0)
                return ret;
        dmar_msi_write(irq, &msg);
@@ -3592,8 +3660,7 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
        struct msi_msg msg;
        unsigned int dest;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
+       if (set_desc_affinity(desc, mask, &dest))
                return -1;
 
        cfg = desc->chip_data;
@@ -3612,6 +3679,19 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 #endif /* CONFIG_SMP */
 
+static struct irq_chip ir_hpet_msi_type = {
+       .name = "IR-HPET_MSI",
+       .unmask = hpet_msi_unmask,
+       .mask = hpet_msi_mask,
+#ifdef CONFIG_INTR_REMAP
+       .ack = ir_ack_apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity = ir_set_msi_irq_affinity,
+#endif
+#endif
+       .retrigger = ioapic_retrigger_irq,
+};
+
 static struct irq_chip hpet_msi_type = {
        .name = "HPET_MSI",
        .unmask = hpet_msi_unmask,
@@ -3623,20 +3703,36 @@ static struct irq_chip hpet_msi_type = {
        .retrigger = ioapic_retrigger_irq,
 };
 
-int arch_setup_hpet_msi(unsigned int irq)
+int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 {
        int ret;
        struct msi_msg msg;
        struct irq_desc *desc = irq_to_desc(irq);
 
-       ret = msi_compose_msg(NULL, irq, &msg);
+       if (intr_remapping_enabled) {
+               struct intel_iommu *iommu = map_hpet_to_ir(id);
+               int index;
+
+               if (!iommu)
+                       return -1;
+
+               index = alloc_irte(iommu, irq, 1);
+               if (index < 0)
+                       return -1;
+       }
+
+       ret = msi_compose_msg(NULL, irq, &msg, id);
        if (ret < 0)
                return ret;
 
        hpet_msi_write(irq, &msg);
        desc->status |= IRQ_MOVE_PCNTXT;
-       set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
-               "edge");
+       if (irq_remapped(irq))
+               set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
+                                             handle_edge_irq, "edge");
+       else
+               set_irq_chip_and_handler_name(irq, &hpet_msi_type,
+                                             handle_edge_irq, "edge");
 
        return 0;
 }
@@ -3670,8 +3766,7 @@ static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
        struct irq_cfg *cfg;
        unsigned int dest;
 
-       dest = set_desc_affinity(desc, mask);
-       if (dest == BAD_APICID)
+       if (set_desc_affinity(desc, mask, &dest))
                return -1;
 
        cfg = desc->chip_data;
@@ -3742,11 +3837,15 @@ int __init io_apic_get_redir_entries (int ioapic)
        union IO_APIC_reg_01    reg_01;
        unsigned long flags;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_01.raw = io_apic_read(ioapic, 1);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
-       return reg_01.bits.entries;
+       /* The register returns the maximum index redir index
+        * supported, which is one less than the total number of redir
+        * entries.
+        */
+       return reg_01.bits.entries + 1;
 }
 
 void __init probe_nr_irqs_gsi(void)
@@ -3762,7 +3861,7 @@ void __init probe_nr_irqs_gsi(void)
 
                nr = 0;
                for (idx = 0; idx < nr_ioapics; idx++)
-                       nr += io_apic_get_redir_entries(idx) + 1;
+                       nr += io_apic_get_redir_entries(idx);
 
                if (nr > nr_irqs_gsi)
                        nr_irqs_gsi = nr;
@@ -3827,7 +3926,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
        /*
         * IRQs < 16 are already in the irq_2_pin[] map
         */
-       if (irq >= nr_legacy_irqs) {
+       if (irq >= legacy_pic->nr_legacy_irqs) {
                cfg = desc->chip_data;
                if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
                        printk(KERN_INFO "can not add pin %d for irq %d\n",
@@ -3906,9 +4005,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
        if (physids_empty(apic_id_map))
                apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_00.raw = io_apic_read(ioapic, 0);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        if (apic_id >= get_physical_broadcast()) {
                printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
@@ -3942,10 +4041,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
        if (reg_00.bits.ID != apic_id) {
                reg_00.bits.ID = apic_id;
 
-               spin_lock_irqsave(&ioapic_lock, flags);
+               raw_spin_lock_irqsave(&ioapic_lock, flags);
                io_apic_write(ioapic, 0, reg_00.raw);
                reg_00.raw = io_apic_read(ioapic, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
                /* Sanity check */
                if (reg_00.bits.ID != apic_id) {
@@ -3966,29 +4065,34 @@ int __init io_apic_get_version(int ioapic)
        union IO_APIC_reg_01    reg_01;
        unsigned long flags;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
+       raw_spin_lock_irqsave(&ioapic_lock, flags);
        reg_01.raw = io_apic_read(ioapic, 1);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return reg_01.bits.version;
 }
 
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
+int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
 {
-       int i;
+       int ioapic, pin, idx;
 
        if (skip_ioapic_setup)
                return -1;
 
-       for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].irqtype == mp_INT &&
-                   mp_irqs[i].srcbusirq == bus_irq)
-                       break;
-       if (i >= mp_irq_entries)
+       ioapic = mp_find_ioapic(gsi);
+       if (ioapic < 0)
+               return -1;
+
+       pin = mp_find_ioapic_pin(ioapic, gsi);
+       if (pin < 0)
+               return -1;
+
+       idx = find_irq_entry(ioapic, pin, mp_INT);
+       if (idx < 0)
                return -1;
 
-       *trigger = irq_trigger(i);
-       *polarity = irq_polarity(i);
+       *trigger = irq_trigger(idx);
+       *polarity = irq_polarity(idx);
        return 0;
 }
 
@@ -4000,27 +4104,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
 {
-       int pin, ioapic = 0, irq, irq_entry;
+       int pin, ioapic, irq, irq_entry;
        struct irq_desc *desc;
        const struct cpumask *mask;
 
        if (skip_ioapic_setup == 1)
                return;
 
-#ifdef CONFIG_ACPI
-       if (!acpi_disabled && acpi_ioapic) {
-               ioapic = mp_find_ioapic(0);
-               if (ioapic < 0)
-                       ioapic = 0;
-       }
-#endif
-
+       for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
        for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
                irq_entry = find_irq_entry(ioapic, pin, mp_INT);
                if (irq_entry == -1)
                        continue;
                irq = pin_2_irq(irq_entry, ioapic, pin);
 
+               if ((ioapic > 0) && (irq > 16))
+                       continue;
+
                desc = irq_to_desc(irq);
 
                /*
@@ -4100,18 +4200,17 @@ void __init ioapic_init_mappings(void)
 #ifdef CONFIG_X86_32
 fake_ioapic_page:
 #endif
-                       ioapic_phys = (unsigned long)
-                               alloc_bootmem_pages(PAGE_SIZE);
+                       ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
                        ioapic_phys = __pa(ioapic_phys);
                }
                set_fixmap_nocache(idx, ioapic_phys);
-               apic_printk(APIC_VERBOSE,
-                           "mapped IOAPIC to %08lx (%08lx)\n",
-                           __fix_to_virt(idx), ioapic_phys);
+               apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+                       __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
+                       ioapic_phys);
                idx++;
 
                ioapic_res->start = ioapic_phys;
-               ioapic_res->end = ioapic_phys + PAGE_SIZE-1;
+               ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
                ioapic_res++;
        }
 }
@@ -4134,7 +4233,7 @@ void __init ioapic_insert_resources(void)
        }
 }
 
-int mp_find_ioapic(int gsi)
+int mp_find_ioapic(u32 gsi)
 {
        int i = 0;
 
@@ -4149,7 +4248,7 @@ int mp_find_ioapic(int gsi)
        return -1;
 }
 
-int mp_find_ioapic_pin(int ioapic, int gsi)
+int mp_find_ioapic_pin(int ioapic, u32 gsi)
 {
        if (WARN_ON(ioapic == -1))
                return -1;
@@ -4177,6 +4276,7 @@ static int bad_ioapic(unsigned long address)
 void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 {
        int idx = 0;
+       int entries;
 
        if (bad_ioapic(address))
                return;
@@ -4195,9 +4295,17 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
         * Build basic GSI lookup table to facilitate gsi->io_apic lookups
         * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
         */
+       entries = io_apic_get_redir_entries(idx);
        mp_gsi_routing[idx].gsi_base = gsi_base;
-       mp_gsi_routing[idx].gsi_end = gsi_base +
-           io_apic_get_redir_entries(idx);
+       mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       nr_ioapic_registers[idx] = entries;
+
+       if (mp_gsi_routing[idx].gsi_end > gsi_end)
+               gsi_end = mp_gsi_routing[idx].gsi_end;
 
        printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
               "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
@@ -4206,3 +4314,24 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 
        nr_ioapics++;
 }
+
+/* Enable IOAPIC early just for system timer */
+void __init pre_init_apic_IRQ0(void)
+{
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+
+       printk(KERN_INFO "Early APIC setup for system timer0\n");
+#ifndef CONFIG_SMP
+       phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+#endif
+       desc = irq_to_desc_alloc_node(0, 0);
+
+       setup_local_APIC();
+
+       cfg = irq_cfg(0);
+       add_pin_to_irq_node(cfg, 0, 0, 0);
+       set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+
+       setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
+}