Merge ../linux-2.6-x86

[safe/jmp/linux-2.6] / arch / x86 / kernel / io_apic.c
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c

index a466b04..d7f0993 100644 (file)
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -41,6 +41,7 @@
  #endif
  #include <linux/bootmem.h>
  #include <linux/dmar.h>
+#include <linux/hpet.h>
  
  #include <asm/idle.h>
  #include <asm/io.h>
@@ -56,6 +57,9 @@
  #include <asm/hypertransport.h>
  #include <asm/setup.h>
  #include <asm/irq_remapping.h>
+#include <asm/hpet.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
  
  #include <mach_ipi.h>
  #include <mach_apic.h>
@@ -72,13 +76,6 @@ int sis_apic_bug = -1;
  static DEFINE_SPINLOCK(ioapic_lock);
  static DEFINE_SPINLOCK(vector_lock);
  
-int first_free_entry;
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-int pin_map_size;
-
  /*
   * # of IRQ routing registers
   */
@@ -110,11 +107,34 @@ static int __init parse_noapic(char *str)
  }
  early_param("noapic", parse_noapic);
  
-struct irq_cfg;
  struct irq_pin_list;
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+{
+       struct irq_pin_list *pin;
+       int node;
+
+       node = cpu_to_node(cpu);
+
+       pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+       printk(KERN_DEBUG "  alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+
+       return pin;
+}
+
  struct irq_cfg {
-       unsigned int irq;
-       struct irq_cfg *next;
         struct irq_pin_list *irq_2_pin;
         cpumask_t domain;
         cpumask_t old_domain;
@@ -124,229 +144,95 @@ struct irq_cfg {
  };
  
  /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg_legacy[] __initdata = {
-       [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg irq_cfgx[] = {
+#else
+static struct irq_cfg irq_cfgx[NR_IRQS] = {
+#endif
+       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
  };
  
-static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
-/* need to be biger than size of irq_cfg_legacy */
-static int nr_irq_cfg = 32;
-
-static int __init parse_nr_irq_cfg(char *arg)
+void __init arch_early_irq_init(void)
  {
-       if (arg) {
-               nr_irq_cfg = simple_strtoul(arg, NULL, 0);
-               if (nr_irq_cfg < 32)
-                       nr_irq_cfg = 32;
-       }
-       return 0;
-}
-
-early_param("nr_irq_cfg", parse_nr_irq_cfg);
-
-static void init_one_irq_cfg(struct irq_cfg *cfg)
-{
-       memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
-}
-
-static struct irq_cfg *irq_cfgx;
-static struct irq_cfg *irq_cfgx_free;
-static void __init init_work(void *data)
-{
-       struct dyn_array *da = data;
         struct irq_cfg *cfg;
-       int legacy_count;
+       struct irq_desc *desc;
+       int count;
         int i;
  
-       cfg = *da->name;
-
-       memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
-
-       legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
-       for (i = legacy_count; i < *da->nr; i++)
-               init_one_irq_cfg(&cfg[i]);
-
-       for (i = 1; i < *da->nr; i++)
-               cfg[i-1].next = &cfg[i];
-
-       irq_cfgx_free = &irq_cfgx[legacy_count];
-       irq_cfgx[legacy_count - 1].next = NULL;
-}
-
-#define for_each_irq_cfg(cfg)          \
-       for (cfg = irq_cfgx; cfg; cfg = cfg->next)
-
-DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
-{
-       struct irq_cfg *cfg;
-
         cfg = irq_cfgx;
-       while (cfg) {
-               if (cfg->irq == irq)
-                       return cfg;
+       count = ARRAY_SIZE(irq_cfgx);
  
-               cfg = cfg->next;
+       for (i = 0; i < count; i++) {
+               desc = irq_to_desc(i);
+               desc->chip_data = &cfg[i];
         }
-
-       return NULL;
  }
  
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg *irq_cfg(unsigned int irq)
  {
-       struct irq_cfg *cfg, *cfg_pri;
-       int i;
-       int count = 0;
-
-       cfg_pri = cfg = irq_cfgx;
-       while (cfg) {
-               if (cfg->irq == irq)
-                       return cfg;
-
-               cfg_pri = cfg;
-               cfg = cfg->next;
-               count++;
-       }
-
-       if (!irq_cfgx_free) {
-               unsigned long phys;
-               unsigned long total_bytes;
-               /*
-                *  we run out of pre-allocate ones, allocate more
-                */
-               printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
-
-               total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
-               if (after_bootmem)
-                       cfg = kzalloc(total_bytes, GFP_ATOMIC);
-               else
-                       cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
-
-               if (!cfg)
-                       panic("please boot with nr_irq_cfg= %d\n", count * 2);
-
-               phys = __pa(cfg);
-               printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
-
-               for (i = 0; i < nr_irq_cfg; i++)
-                       init_one_irq_cfg(&cfg[i]);
-
-               for (i = 1; i < nr_irq_cfg; i++)
-                       cfg[i-1].next = &cfg[i];
+       struct irq_cfg *cfg = NULL;
+       struct irq_desc *desc;
  
-               irq_cfgx_free = cfg;
-       }
+       desc = irq_to_desc(irq);
+       if (desc)
+               cfg = desc->chip_data;
  
-       cfg = irq_cfgx_free;
-       irq_cfgx_free = irq_cfgx_free->next;
-       cfg->next = NULL;
-       if (cfg_pri)
-               cfg_pri->next = cfg;
-       else
-               irq_cfgx = cfg;
-       cfg->irq = irq;
-       printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
-#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
-       {
-               /* dump the results */
-               struct irq_cfg *cfg;
-               unsigned long phys;
-               unsigned long bytes = sizeof(struct irq_cfg);
-
-               printk(KERN_DEBUG "=========================== %d\n", irq);
-               printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
-               for_each_irq_cfg(cfg) {
-                       phys = __pa(cfg);
-                       printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
-               }
-               printk(KERN_DEBUG "===========================\n");
-       }
-#endif
         return cfg;
  }
  
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-struct irq_pin_list {
-       int apic, pin;
-       struct irq_pin_list *next;
-};
-
-static struct irq_pin_list *irq_2_pin_head;
-/* fill one page ? */
-static int nr_irq_2_pin = 0x100;
-static struct irq_pin_list *irq_2_pin_ptr;
-static void __init irq_2_pin_init_work(void *data)
+static struct irq_cfg *get_one_free_irq_cfg(int cpu)
  {
-       struct dyn_array *da = data;
-       struct irq_pin_list *pin;
-       int i;
+       struct irq_cfg *cfg;
+       int node;
  
-       pin = *da->name;
+       node = cpu_to_node(cpu);
  
-       for (i = 1; i < *da->nr; i++)
-               pin[i-1].next = &pin[i];
+       cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+       printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
  
-       irq_2_pin_ptr = &pin[0];
+       return cfg;
  }
-DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
  
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
+void arch_init_chip_data(struct irq_desc *desc, int cpu)
  {
-       struct irq_pin_list *pin;
-       int i;
-
-       pin = irq_2_pin_ptr;
+       struct irq_cfg *cfg;
  
-       if (pin) {
-               irq_2_pin_ptr = pin->next;
-               pin->next = NULL;
-               return pin;
+       cfg = desc->chip_data;
+       if (!cfg) {
+               desc->chip_data = get_one_free_irq_cfg(cpu);
+               if (!desc->chip_data) {
+                       printk(KERN_ERR "can not alloc irq_cfg\n");
+                       BUG_ON(1);
+               }
         }
+}
  
-       /*
-        *  we run out of pre-allocate ones, allocate more
-        */
-       printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
-
-       if (after_bootmem)
-               pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
-                                GFP_ATOMIC);
-       else
-               pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
-                               nr_irq_2_pin, PAGE_SIZE, 0);
-
-       if (!pin)
-               panic("can not get more irq_2_pin\n");
-
-       for (i = 1; i < nr_irq_2_pin; i++)
-               pin[i-1].next = &pin[i];
+#else
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+}
  
-       irq_2_pin_ptr = pin->next;
-       pin->next = NULL;
+#endif
  
-       return pin;
+static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
  }
  
  struct io_apic {
@@ -384,16 +270,16 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
  static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
  {
         struct io_apic __iomem *io_apic = io_apic_base(apic);
-        if (sis_apic_bug)
-                writel(reg, &io_apic->index);
+
+       if (sis_apic_bug)
+               writel(reg, &io_apic->index);
         writel(value, &io_apic->data);
  }
  
-static bool io_apic_level_ack_pending(unsigned int irq)
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
  {
         struct irq_pin_list *entry;
         unsigned long flags;
-       struct irq_cfg *cfg = irq_cfg(irq);
  
         spin_lock_irqsave(&ioapic_lock, flags);
         entry = cfg->irq_2_pin;
@@ -475,13 +361,12 @@ static void ioapic_mask_entry(int apic, int pin)
  }
  
  #ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
  {
         int apic, pin;
-       struct irq_cfg *cfg;
         struct irq_pin_list *entry;
+       u8 vector = cfg->vector;
  
-       cfg = irq_cfg(irq);
         entry = cfg->irq_2_pin;
         for (;;) {
                 unsigned int reg;
@@ -511,37 +396,49 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
         }
  }
  
-static int assign_irq_vector(int irq, cpumask_t mask);
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
  
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq_desc(struct irq_desc *desc,
+                                        const struct cpumask *mask)
  {
         struct irq_cfg *cfg;
         unsigned long flags;
         unsigned int dest;
         cpumask_t tmp;
-       struct irq_desc *desc;
+       unsigned int irq;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
  
-       cfg = irq_cfg(irq);
-       if (assign_irq_vector(irq, mask))
+       irq = desc->irq;
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, *mask))
                 return;
  
-       cpus_and(tmp, cfg->domain, mask);
+       set_extra_move_desc(desc, *mask);
+
+       cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
         /*
          * Only the high 8 bits are valid.
          */
         dest = SET_APIC_LOGICAL_ID(dest);
  
-       desc = irq_to_desc(irq);
         spin_lock_irqsave(&ioapic_lock, flags);
-       __target_IO_APIC_irq(irq, dest, cfg->vector);
-       desc->affinity = mask;
+       __target_IO_APIC_irq(irq, dest, cfg);
+       cpumask_copy(&desc->affinity, mask);
         spin_unlock_irqrestore(&ioapic_lock, flags);
  }
+
+static void set_ioapic_affinity_irq(unsigned int irq,
+                                   const struct cpumask *mask)
+{
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+
+       set_ioapic_affinity_irq_desc(desc, mask);
+}
  #endif /* CONFIG_SMP */
  
  /*
@@ -549,20 +446,21 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
   * shared ISA-space IRQs, so we have to support them. We are super
   * fast in the common case, and fast for shared ISA-space IRQs.
   */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
  {
-       struct irq_cfg *cfg;
         struct irq_pin_list *entry;
  
-       /* first time to refer irq_cfg, so with new */
-       cfg = irq_cfg_alloc(irq);
         entry = cfg->irq_2_pin;
         if (!entry) {
-               entry = get_one_free_irq_2_pin();
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+                                       apic, pin);
+                       return;
+               }
                 cfg->irq_2_pin = entry;
                 entry->apic = apic;
                 entry->pin = pin;
-               printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
                 return;
         }
  
@@ -574,21 +472,19 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
                 entry = entry->next;
         }
  
-       entry->next = get_one_free_irq_2_pin();
+       entry->next = get_one_free_irq_2_pin(cpu);
         entry = entry->next;
         entry->apic = apic;
         entry->pin = pin;
-       printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
  }
  
  /*
   * Reroute an IRQ to a different pin.
   */
-static void __init replace_pin_at_irq(unsigned int irq,
+static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
                                       int oldapic, int oldpin,
                                       int newapic, int newpin)
  {
-       struct irq_cfg *cfg = irq_cfg(irq);
         struct irq_pin_list *entry = cfg->irq_2_pin;
         int replaced = 0;
  
@@ -605,87 +501,103 @@ static void __init replace_pin_at_irq(unsigned int irq,
  
         /* why? call replace before add? */
         if (!replaced)
-               add_pin_to_irq(irq, newapic, newpin);
-}
-
-#define __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)           \
-                                                                       \
-{                                                                      \
-       int pin;                                                        \
-       struct irq_cfg *cfg;                                            \
-       struct irq_pin_list *entry;                                     \
-                                                                       \
-       cfg = irq_cfg(irq);                                             \
-       entry = cfg->irq_2_pin;                                         \
-       for (;;) {                                                      \
-               unsigned int reg;                                       \
-               if (!entry)                                             \
-                       break;                                          \
-               pin = entry->pin;                                       \
-               reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
-               reg ACTION_DISABLE;                                     \
-               reg ACTION_ENABLE;                                      \
-               io_apic_modify(entry->apic, 0x10 + R + pin*2, reg);     \
-               FINAL;                                                  \
-               if (!entry->next)                                       \
-                       break;                                          \
-               entry = entry->next;                                    \
-       }                                                               \
-}
-
-#define DO_ACTION(name,R, ACTION_ENABLE, ACTION_DISABLE, FINAL)                \
-                                                                       \
-       static void name##_IO_APIC_irq (unsigned int irq)               \
-       __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL)
-
-/* mask = 0 */
-DO_ACTION(__unmask,    0, |= 0, &= ~IO_APIC_REDIR_MASKED, )
+               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+}
  
-#ifdef CONFIG_X86_64
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
+static inline void io_apic_modify_irq(struct irq_cfg *cfg,
+                               int mask_and, int mask_or,
+                               void (*final)(struct irq_pin_list *entry))
  {
-       struct io_apic __iomem *io_apic = io_apic_base(apic);
-       readl(&io_apic->data);
-}
+       int pin;
+       struct irq_pin_list *entry;
  
-/* mask = 1 */
-DO_ACTION(__mask,      0, |= IO_APIC_REDIR_MASKED, &= ~0, io_apic_sync(entry->apic))
+       for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+               unsigned int reg;
+               pin = entry->pin;
+               reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+               reg &= mask_and;
+               reg |= mask_or;
+               io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+               if (final)
+                       final(entry);
+       }
+}
  
-#else
+static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
  
-/* mask = 1 */
-DO_ACTION(__mask,      0, |= IO_APIC_REDIR_MASKED, &= ~0, )
+#ifdef CONFIG_X86_64
+void io_apic_sync(struct irq_pin_list *entry)
+{
+       /*
+        * Synchronize the IO-APIC and the CPU by doing
+        * a dummy read from the IO-APIC
+        */
+       struct io_apic __iomem *io_apic;
+       io_apic = io_apic_base(entry->apic);
+       readl(&io_apic->data);
+}
  
-/* mask = 1, trigger = 0 */
-DO_ACTION(__mask_and_edge, 0, |= IO_APIC_REDIR_MASKED, &= ~IO_APIC_REDIR_LEVEL_TRIGGER, )
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+#else /* CONFIG_X86_32 */
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
+}
  
-/* mask = 0, trigger = 1 */
-DO_ACTION(__unmask_and_level, 0, |= IO_APIC_REDIR_LEVEL_TRIGGER, &= ~IO_APIC_REDIR_MASKED, )
+static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+                       IO_APIC_REDIR_MASKED, NULL);
+}
  
-#endif
+static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
+{
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
+                       IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+}
+#endif /* CONFIG_X86_32 */
  
-static void mask_IO_APIC_irq (unsigned int irq)
+static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
  {
+       struct irq_cfg *cfg = desc->chip_data;
         unsigned long flags;
  
+       BUG_ON(!cfg);
+
         spin_lock_irqsave(&ioapic_lock, flags);
-       __mask_IO_APIC_irq(irq);
+       __mask_IO_APIC_irq(cfg);
         spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
-static void unmask_IO_APIC_irq (unsigned int irq)
+static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
  {
+       struct irq_cfg *cfg = desc->chip_data;
         unsigned long flags;
  
         spin_lock_irqsave(&ioapic_lock, flags);
-       __unmask_IO_APIC_irq(irq);
+       __unmask_IO_APIC_irq(cfg);
         spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
+static void mask_IO_APIC_irq(unsigned int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       mask_IO_APIC_irq_desc(desc);
+}
+static void unmask_IO_APIC_irq(unsigned int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       unmask_IO_APIC_irq_desc(desc);
+}
+
  static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
  {
         struct IO_APIC_route_entry entry;
@@ -795,7 +707,7 @@ int save_mask_IO_APIC_setup(void)
                         kzalloc(sizeof(struct IO_APIC_route_entry) *
                                 nr_ioapic_registers[apic], GFP_KERNEL);
                 if (!early_ioapic_entries[apic])
-                       return -ENOMEM;
+                       goto nomem;
         }
  
         for (apic = 0; apic < nr_ioapics; apic++)
@@ -809,17 +721,31 @@ int save_mask_IO_APIC_setup(void)
                                 ioapic_write_entry(apic, pin, entry);
                         }
                 }
+
         return 0;
+
+nomem:
+       while (apic >= 0)
+               kfree(early_ioapic_entries[apic--]);
+       memset(early_ioapic_entries, 0,
+               ARRAY_SIZE(early_ioapic_entries));
+
+       return -ENOMEM;
  }
  
  void restore_IO_APIC_setup(void)
  {
         int apic, pin;
  
-       for (apic = 0; apic < nr_ioapics; apic++)
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               if (!early_ioapic_entries[apic])
+                       break;
                 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
                         ioapic_write_entry(apic, pin,
                                            early_ioapic_entries[apic][pin]);
+               kfree(early_ioapic_entries[apic]);
+               early_ioapic_entries[apic] = NULL;
+       }
  }
  
  void reinit_intr_remapped_IO_APIC(int intr_remapping)
@@ -948,7 +874,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
   */
  static int EISA_ELCR(unsigned int irq)
  {
-       if (irq < 16) {
+       if (irq < NR_IRQS_LEGACY) {
                 unsigned int port = 0x4d0 + (irq >> 3);
                 return (inb(port) >> (irq & 7)) & 1;
         }
@@ -1131,11 +1057,11 @@ static int pin_2_irq(int idx, int apic, int pin)
                 while (i < apic)
                         irq += nr_ioapic_registers[i++];
                 irq += pin;
-                /*
+               /*
                   * For MPS mode, so far only needed by ES7000 platform
                   */
-                if (ioapic_renumber_irq)
-                        irq = ioapic_renumber_irq(apic, irq);
+               if (ioapic_renumber_irq)
+                       irq = ioapic_renumber_irq(apic, irq);
         }
  
  #ifdef CONFIG_X86_32
@@ -1173,7 +1099,7 @@ void unlock_vector_lock(void)
         spin_unlock(&vector_lock);
  }
  
-static int __assign_irq_vector(int irq, cpumask_t mask)
+static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
  {
         /*
          * NOTE! The local APIC isn't very good at handling
@@ -1189,16 +1115,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
         static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
         unsigned int old_vector;
         int cpu;
-       struct irq_cfg *cfg;
  
-       cfg = irq_cfg(irq);
+       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+               return -EBUSY;
  
         /* Only try and allocate irqs on cpus that are present */
         cpus_and(mask, mask, cpu_online_map);
  
-       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-               return -EBUSY;
-
         old_vector = cfg->vector;
         if (old_vector) {
                 cpumask_t tmp;
@@ -1252,24 +1175,22 @@ next:
         return -ENOSPC;
  }
  
-static int assign_irq_vector(int irq, cpumask_t mask)
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
  {
         int err;
         unsigned long flags;
  
         spin_lock_irqsave(&vector_lock, flags);
-       err = __assign_irq_vector(irq, mask);
+       err = __assign_irq_vector(irq, cfg, mask);
         spin_unlock_irqrestore(&vector_lock, flags);
         return err;
  }
  
-static void __clear_irq_vector(int irq)
+static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
  {
-       struct irq_cfg *cfg;
         cpumask_t mask;
         int cpu, vector;
  
-       cfg = irq_cfg(irq);
         BUG_ON(!cfg->vector);
  
         vector = cfg->vector;
@@ -1279,6 +1200,20 @@ static void __clear_irq_vector(int irq)
  
         cfg->vector = 0;
         cpus_clear(cfg->domain);
+
+       if (likely(!cfg->move_in_progress))
+               return;
+       cpus_and(mask, cfg->old_domain, cpu_online_map);
+       for_each_cpu_mask_nr(cpu, mask) {
+               for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+                                                               vector++) {
+                       if (per_cpu(vector_irq, cpu)[vector] != irq)
+                               continue;
+                       per_cpu(vector_irq, cpu)[vector] = -1;
+                       break;
+               }
+       }
+       cfg->move_in_progress = 0;
  }
  
  void __setup_vector_irq(int cpu)
@@ -1287,13 +1222,16 @@ void __setup_vector_irq(int cpu)
         /* This function must be called with vector_lock held */
         int irq, vector;
         struct irq_cfg *cfg;
+       struct irq_desc *desc;
  
         /* Mark the inuse vectors */
-       for_each_irq_cfg(cfg) {
+       for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
+               cfg = desc->chip_data;
                 if (!cpu_isset(cpu, cfg->domain))
                         continue;
                 vector = cfg->vector;
-               irq = cfg->irq;
                 per_cpu(vector_irq, cpu)[vector] = irq;
         }
         /* Mark the free vectors */
@@ -1320,19 +1258,19 @@ static struct irq_chip ir_ioapic_chip;
  #ifdef CONFIG_X86_32
  static inline int IO_APIC_irq_trigger(int irq)
  {
-        int apic, idx, pin;
+       int apic, idx, pin;
  
-        for (apic = 0; apic < nr_ioapics; apic++) {
-                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-                        idx = find_irq_entry(apic, pin, mp_INT);
-                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
-                                return irq_trigger(idx);
-                }
-        }
-        /*
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       idx = find_irq_entry(apic, pin, mp_INT);
+                       if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                               return irq_trigger(idx);
+               }
+       }
+       /*
           * nonexistent IRQs are edge default
           */
-        return 0;
+       return 0;
  }
  #else
  static inline int IO_APIC_irq_trigger(int irq)
@@ -1341,15 +1279,8 @@ static inline int IO_APIC_irq_trigger(int irq)
  }
  #endif
  
-static void ioapic_register_intr(int irq, unsigned long trigger)
+static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
  {
-       struct irq_desc *desc;
-
-       /* first time to use this irq_desc */
-       if (irq < 16)
-               desc = irq_to_desc(irq);
-       else
-               desc = irq_to_desc_alloc(irq);
  
         if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
             trigger == IOAPIC_LEVEL)
@@ -1441,7 +1372,7 @@ static int setup_ioapic_entry(int apic, int irq,
         return 0;
  }
  
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
                               int trigger, int polarity)
  {
         struct irq_cfg *cfg;
@@ -1451,10 +1382,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
         if (!IO_APIC_IRQ(irq))
                 return;
  
-       cfg = irq_cfg(irq);
+       cfg = desc->chip_data;
  
         mask = TARGET_CPUS;
-       if (assign_irq_vector(irq, mask))
+       if (assign_irq_vector(irq, cfg, mask))
                 return;
  
         cpus_and(mask, cfg->domain, mask);
@@ -1471,12 +1402,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                                cfg->vector)) {
                 printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
                        mp_ioapics[apic].mp_apicid, pin);
-               __clear_irq_vector(irq);
+               __clear_irq_vector(irq, cfg);
                 return;
         }
  
-       ioapic_register_intr(irq, trigger);
-       if (irq < 16)
+       ioapic_register_intr(irq, desc, trigger);
+       if (irq < NR_IRQS_LEGACY)
                 disable_8259A_irq(irq);
  
         ioapic_write_entry(apic, pin, entry);
@@ -1484,41 +1415,58 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
  
  static void __init setup_IO_APIC_irqs(void)
  {
-       int apic, pin, idx, irq, first_notcon = 1;
+       int apic, pin, idx, irq;
+       int notcon = 0;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
  
         apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
  
         for (apic = 0; apic < nr_ioapics; apic++) {
-       for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-               idx = find_irq_entry(apic,pin,mp_INT);
-               if (idx == -1) {
-                       if (first_notcon) {
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
-                               first_notcon = 0;
-                       } else
-                               apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
-                       continue;
-               }
-               if (!first_notcon) {
-                       apic_printk(APIC_VERBOSE, " not connected.\n");
-                       first_notcon = 1;
-               }
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
  
-               irq = pin_2_irq(idx, apic, pin);
+                       idx = find_irq_entry(apic, pin, mp_INT);
+                       if (idx == -1) {
+                               if (!notcon) {
+                                       notcon = 1;
+                                       apic_printk(APIC_VERBOSE,
+                                               KERN_DEBUG " %d-%d",
+                                               mp_ioapics[apic].mp_apicid,
+                                               pin);
+                               } else
+                                       apic_printk(APIC_VERBOSE, " %d-%d",
+                                               mp_ioapics[apic].mp_apicid,
+                                               pin);
+                               continue;
+                       }
+                       if (notcon) {
+                               apic_printk(APIC_VERBOSE,
+                                       " (apicid-pin) not connected\n");
+                               notcon = 0;
+                       }
+
+                       irq = pin_2_irq(idx, apic, pin);
  #ifdef CONFIG_X86_32
-                if (multi_timer_check(apic, irq))
-                        continue;
+                       if (multi_timer_check(apic, irq))
+                               continue;
  #endif
-               add_pin_to_irq(irq, apic, pin);
+                       desc = irq_to_desc_alloc_cpu(irq, cpu);
+                       if (!desc) {
+                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                               continue;
+                       }
+                       cfg = desc->chip_data;
+                       add_pin_to_irq_cpu(cfg, cpu, apic, pin);
  
-               setup_IO_APIC_irq(apic, pin, irq,
-                                 irq_trigger(idx), irq_polarity(idx));
-       }
+                       setup_IO_APIC_irq(apic, pin, irq, desc,
+                                       irq_trigger(idx), irq_polarity(idx));
+               }
         }
  
-       if (!first_notcon)
-               apic_printk(APIC_VERBOSE, " not connected.\n");
+       if (notcon)
+               apic_printk(APIC_VERBOSE,
+                       " (apicid-pin) not connected\n");
  }
  
  /*
@@ -1570,6 +1518,8 @@ __apicdebuginit(void) print_IO_APIC(void)
         union IO_APIC_reg_03 reg_03;
         unsigned long flags;
         struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       unsigned int irq;
  
         if (apic_verbosity == APIC_QUIET)
                 return;
@@ -1592,8 +1542,8 @@ __apicdebuginit(void) print_IO_APIC(void)
         reg_01.raw = io_apic_read(apic, 1);
         if (reg_01.bits.version >= 0x10)
                 reg_02.raw = io_apic_read(apic, 2);
-        if (reg_01.bits.version >= 0x20)
-                reg_03.raw = io_apic_read(apic, 3);
+       if (reg_01.bits.version >= 0x20)
+               reg_03.raw = io_apic_read(apic, 3);
         spin_unlock_irqrestore(&ioapic_lock, flags);
  
         printk("\n");
@@ -1658,11 +1608,16 @@ __apicdebuginit(void) print_IO_APIC(void)
         }
         }
         printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for_each_irq_cfg(cfg) {
-               struct irq_pin_list *entry = cfg->irq_2_pin;
+       for_each_irq_desc(irq, desc) {
+               struct irq_pin_list *entry;
+
+               if (!desc)
+                       continue;
+               cfg = desc->chip_data;
+               entry = cfg->irq_2_pin;
                 if (!entry)
                         continue;
-               printk(KERN_DEBUG "IRQ%d ", cfg->irq);
+               printk(KERN_DEBUG "IRQ%d ", irq);
                 for (;;) {
                         printk("-> %d:%d", entry->apic, entry->pin);
                         if (!entry->next)
@@ -1719,21 +1674,30 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
         printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
  
         if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-               v = apic_read(APIC_ARBPRI);
-               printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-                       v & APIC_ARBPRI_MASK);
+               if (!APIC_XAPIC(ver)) {
+                       v = apic_read(APIC_ARBPRI);
+                       printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+                              v & APIC_ARBPRI_MASK);
+               }
                 v = apic_read(APIC_PROCPRI);
                 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
         }
  
-       v = apic_read(APIC_EOI);
-       printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-       v = apic_read(APIC_RRR);
-       printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       /*
+        * Remote read supported only in the 82489DX and local APIC for
+        * Pentium processors.
+        */
+       if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+               v = apic_read(APIC_RRR);
+               printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       }
+
         v = apic_read(APIC_LDR);
         printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-       v = apic_read(APIC_DFR);
-       printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       if (!x2apic_enabled()) {
+               v = apic_read(APIC_DFR);
+               printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       }
         v = apic_read(APIC_SPIV);
         printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
  
@@ -1753,8 +1717,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
         }
  
         icr = apic_icr_read();
-       printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
-       printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
  
         v = apic_read(APIC_LVTT);
         printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1784,7 +1748,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
  
  __apicdebuginit(void) print_all_local_APICs(void)
  {
-       on_each_cpu(print_local_APIC, NULL, 1);
+       int cpu;
+
+       preempt_disable();
+       for_each_online_cpu(cpu)
+               smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+       preempt_enable();
  }
  
  __apicdebuginit(void) print_PIC(void)
@@ -2040,6 +2009,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
  
                 reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
                 spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0, reg_00.raw);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
  
                 /*
                  * Sanity check
@@ -2127,14 +2098,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
  {
         int was_pending = 0;
         unsigned long flags;
+       struct irq_cfg *cfg;
  
         spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < 16) {
+       if (irq < NR_IRQS_LEGACY) {
                 disable_8259A_irq(irq);
                 if (i8259A_irq_pending(irq))
                         was_pending = 1;
         }
-       __unmask_IO_APIC_irq(irq);
+       cfg = irq_cfg(irq);
+       __unmask_IO_APIC_irq(cfg);
         spin_unlock_irqrestore(&ioapic_lock, flags);
  
         return was_pending;
@@ -2156,9 +2129,9 @@ static int ioapic_retrigger_irq(unsigned int irq)
  #else
  static int ioapic_retrigger_irq(unsigned int irq)
  {
-        send_IPI_self(irq_cfg(irq)->vector);
+       send_IPI_self(irq_cfg(irq)->vector);
  
-        return 1;
+       return 1;
  }
  #endif
  
@@ -2197,35 +2170,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
   * as simple as edge triggered migration and we can do the irq migration
   * with a simple atomic update to IO-APIC RTE.
   */
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
+static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
  {
         struct irq_cfg *cfg;
-       struct irq_desc *desc;
         cpumask_t tmp, cleanup_mask;
         struct irte irte;
         int modify_ioapic_rte;
         unsigned int dest;
         unsigned long flags;
+       unsigned int irq;
  
         cpus_and(tmp, mask, cpu_online_map);
         if (cpus_empty(tmp))
                 return;
  
+       irq = desc->irq;
         if (get_irte(irq, &irte))
                 return;
  
-       if (assign_irq_vector(irq, mask))
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
                 return;
  
-       cfg = irq_cfg(irq);
+       set_extra_move_desc(desc, mask);
+
         cpus_and(tmp, cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
-       desc = irq_to_desc(irq);
         modify_ioapic_rte = desc->status & IRQ_LEVEL;
         if (modify_ioapic_rte) {
                 spin_lock_irqsave(&ioapic_lock, flags);
-               __target_IO_APIC_irq(irq, dest, cfg->vector);
+               __target_IO_APIC_irq(irq, dest, cfg);
                 spin_unlock_irqrestore(&ioapic_lock, flags);
         }
  
@@ -2247,16 +2222,16 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
         desc->affinity = mask;
  }
  
-static int migrate_irq_remapped_level(int irq)
+static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
  {
         int ret = -1;
-       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg = desc->chip_data;
  
-       mask_IO_APIC_irq(irq);
+       mask_IO_APIC_irq_desc(desc);
  
-       if (io_apic_level_ack_pending(irq)) {
+       if (io_apic_level_ack_pending(cfg)) {
                 /*
-                * Interrupt in progress. Migrating irq now will change the
+                * Interrupt in progress. Migrating irq now will change the
                  * vector information in the IO-APIC RTE and that will confuse
                  * the EOI broadcast performed by cpu.
                  * So, delay the irq migration to the next instance.
@@ -2266,14 +2241,15 @@ static int migrate_irq_remapped_level(int irq)
         }
  
         /* everthing is clear. we have right of way */
-       migrate_ioapic_irq(irq, desc->pending_mask);
+       migrate_ioapic_irq_desc(desc, desc->pending_mask);
  
         ret = 0;
         desc->status &= ~IRQ_MOVE_PENDING;
         cpus_clear(desc->pending_mask);
  
  unmask:
-       unmask_IO_APIC_irq(irq);
+       unmask_IO_APIC_irq_desc(desc);
+
         return ret;
  }
  
@@ -2283,6 +2259,9 @@ static void ir_irq_migration(struct work_struct *work)
         struct irq_desc *desc;
  
         for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
+
                 if (desc->status & IRQ_MOVE_PENDING) {
                         unsigned long flags;
  
@@ -2294,7 +2273,7 @@ static void ir_irq_migration(struct work_struct *work)
                                 continue;
                         }
  
-                       desc->chip->set_affinity(irq, desc->pending_mask);
+                       desc->chip->set_affinity(irq, &desc->pending_mask);
                         spin_unlock_irqrestore(&desc->lock, flags);
                 }
         }
@@ -2303,18 +2282,24 @@ static void ir_irq_migration(struct work_struct *work)
  /*
   * Migrates the IRQ destination in the process context.
   */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+                                           const struct cpumask *mask)
  {
-       struct irq_desc *desc = irq_to_desc(irq);
-
         if (desc->status & IRQ_LEVEL) {
                 desc->status |= IRQ_MOVE_PENDING;
-               desc->pending_mask = mask;
-               migrate_irq_remapped_level(irq);
+               cpumask_copy(&desc->pending_mask, mask);
+               migrate_irq_remapped_level_desc(desc);
                 return;
         }
  
-       migrate_ioapic_irq(irq, mask);
+       migrate_ioapic_irq_desc(desc, mask);
+}
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
+                                      const struct cpumask *mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       set_ir_ioapic_affinity_irq_desc(desc, mask);
  }
  #endif
  
@@ -2334,6 +2319,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                 struct irq_cfg *cfg;
                 irq = __get_cpu_var(vector_irq)[vector];
  
+               if (irq == -1)
+                       continue;
+
                 desc = irq_to_desc(irq);
                 if (!desc)
                         continue;
@@ -2355,9 +2343,10 @@ unlock:
         irq_exit();
  }
  
-static void irq_complete_move(unsigned int irq)
+static void irq_complete_move(struct irq_desc **descp)
  {
-       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_desc *desc = *descp;
+       struct irq_cfg *cfg = desc->chip_data;
         unsigned vector, me;
  
         if (likely(!cfg->move_in_progress))
@@ -2375,8 +2364,9 @@ static void irq_complete_move(unsigned int irq)
         }
  }
  #else
-static inline void irq_complete_move(unsigned int irq) {}
+static inline void irq_complete_move(struct irq_desc **descp) {}
  #endif
+
  #ifdef CONFIG_INTR_REMAP
  static void ack_x2apic_level(unsigned int irq)
  {
@@ -2387,33 +2377,37 @@ static void ack_x2apic_edge(unsigned int irq)
  {
         ack_x2APIC_irq();
  }
+
  #endif
  
  static void ack_apic_edge(unsigned int irq)
  {
-       irq_complete_move(irq);
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       irq_complete_move(&desc);
         move_native_irq(irq);
         ack_APIC_irq();
  }
  
-#ifdef CONFIG_X86_32
  atomic_t irq_mis_count;
-#endif
  
  static void ack_apic_level(unsigned int irq)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
+
  #ifdef CONFIG_X86_32
         unsigned long v;
         int i;
  #endif
+       struct irq_cfg *cfg;
         int do_unmask_irq = 0;
  
-       irq_complete_move(irq);
+       irq_complete_move(&desc);
  #ifdef CONFIG_GENERIC_PENDING_IRQ
         /* If we are moving the irq we need to mask it */
-       if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+       if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
                 do_unmask_irq = 1;
-               mask_IO_APIC_irq(irq);
+               mask_IO_APIC_irq_desc(desc);
         }
  #endif
  
@@ -2437,7 +2431,8 @@ static void ack_apic_level(unsigned int irq)
         * operation to prevent an edge-triggered interrupt escaping meanwhile.
         * The idea is from Manfred Spraul.  --macro
         */
-       i = irq_cfg(irq)->vector;
+       cfg = desc->chip_data;
+       i = cfg->vector;
  
         v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
  #endif
@@ -2476,45 +2471,46 @@ static void ack_apic_level(unsigned int irq)
                  * accurate and is causing problems then it is a hardware bug
                  * and you can go talk to the chipset vendor about it.
                  */
-               if (!io_apic_level_ack_pending(irq))
+               cfg = desc->chip_data;
+               if (!io_apic_level_ack_pending(cfg))
                         move_masked_irq(irq);
-               unmask_IO_APIC_irq(irq);
+               unmask_IO_APIC_irq_desc(desc);
         }
  
  #ifdef CONFIG_X86_32
         if (!(v & (1 << (i & 0x1f)))) {
                 atomic_inc(&irq_mis_count);
                 spin_lock(&ioapic_lock);
-               __mask_and_edge_IO_APIC_irq(irq);
-               __unmask_and_level_IO_APIC_irq(irq);
+               __mask_and_edge_IO_APIC_irq(cfg);
+               __unmask_and_level_IO_APIC_irq(cfg);
                 spin_unlock(&ioapic_lock);
         }
  #endif
  }
  
  static struct irq_chip ioapic_chip __read_mostly = {
-       .name           = "IO-APIC",
-       .startup        = startup_ioapic_irq,
-       .mask           = mask_IO_APIC_irq,
-       .unmask         = unmask_IO_APIC_irq,
-       .ack            = ack_apic_edge,
-       .eoi            = ack_apic_level,
+       .name           = "IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_apic_edge,
+       .eoi            = ack_apic_level,
  #ifdef CONFIG_SMP
-       .set_affinity   = set_ioapic_affinity_irq,
+       .set_affinity   = set_ioapic_affinity_irq,
  #endif
         .retrigger      = ioapic_retrigger_irq,
  };
  
  #ifdef CONFIG_INTR_REMAP
  static struct irq_chip ir_ioapic_chip __read_mostly = {
-       .name           = "IR-IO-APIC",
-       .startup        = startup_ioapic_irq,
-       .mask           = mask_IO_APIC_irq,
-       .unmask         = unmask_IO_APIC_irq,
-       .ack            = ack_x2apic_edge,
-       .eoi            = ack_x2apic_level,
+       .name           = "IR-IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_x2apic_edge,
+       .eoi            = ack_x2apic_level,
  #ifdef CONFIG_SMP
-       .set_affinity   = set_ir_ioapic_affinity_irq,
+       .set_affinity   = set_ir_ioapic_affinity_irq,
  #endif
         .retrigger      = ioapic_retrigger_irq,
  };
@@ -2537,21 +2533,22 @@ static inline void init_IO_APIC_traps(void)
          * Also, we've got to be careful not to trash gate
          * 0x80, because int 0x80 is hm, kind of importantish. ;)
          */
-       for_each_irq_cfg(cfg) {
-               irq = cfg->irq;
-               if (IO_APIC_IRQ(irq) && !cfg->vector) {
+       for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
+
+               cfg = desc->chip_data;
+               if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
                         /*
                          * Hmm.. We don't have an entry for this,
                          * so default to an old-fashioned 8259
                          * interrupt if we can..
                          */
-                       if (irq < 16)
+                       if (irq < NR_IRQS_LEGACY)
                                 make_8259A_irq(irq);
-                       else {
-                               desc = irq_to_desc(irq);
+                       else
                                 /* Strange. Oh, well.. */
                                 desc->chip = &no_irq_chip;
-                       }
                 }
         }
  }
@@ -2576,7 +2573,7 @@ static void unmask_lapic_irq(unsigned int irq)
         apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
  }
  
-static void ack_lapic_irq (unsigned int irq)
+static void ack_lapic_irq(unsigned int irq)
  {
         ack_APIC_irq();
  }
@@ -2588,11 +2585,8 @@ static struct irq_chip lapic_chip __read_mostly = {
         .ack            = ack_lapic_irq,
  };
  
-static void lapic_register_intr(int irq)
+static void lapic_register_intr(int irq, struct irq_desc *desc)
  {
-       struct irq_desc *desc;
-
-       desc = irq_to_desc(irq);
         desc->status &= ~IRQ_LEVEL;
         set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
                                       "edge");
@@ -2696,7 +2690,9 @@ int timer_through_8259 __initdata;
   */
  static inline void __init check_timer(void)
  {
-       struct irq_cfg *cfg = irq_cfg(0);
+       struct irq_desc *desc = irq_to_desc(0);
+       struct irq_cfg *cfg = desc->chip_data;
+       int cpu = boot_cpu_id;
         int apic1, pin1, apic2, pin2;
         unsigned long flags;
         unsigned int ver;
@@ -2704,14 +2700,14 @@ static inline void __init check_timer(void)
  
         local_irq_save(flags);
  
-        ver = apic_read(APIC_LVR);
-        ver = GET_APIC_VERSION(ver);
+       ver = apic_read(APIC_LVR);
+       ver = GET_APIC_VERSION(ver);
  
         /*
          * get/set the timer IRQ vector:
          */
         disable_8259A_irq(0);
-       assign_irq_vector(0, TARGET_CPUS);
+       assign_irq_vector(0, cfg, TARGET_CPUS);
  
         /*
          * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2762,10 +2758,10 @@ static inline void __init check_timer(void)
                  * Ok, does IRQ0 through the IOAPIC work?
                  */
                 if (no_pin1) {
-                       add_pin_to_irq(0, apic1, pin1);
+                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
                         setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                 }
-               unmask_IO_APIC_irq(0);
+               unmask_IO_APIC_irq_desc(desc);
                 if (timer_irq_works()) {
                         if (nmi_watchdog == NMI_IO_APIC) {
                                 setup_nmi();
@@ -2791,9 +2787,9 @@ static inline void __init check_timer(void)
                 /*
                  * legacy devices should be connected to IO APIC #0
                  */
-               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
                 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-               unmask_IO_APIC_irq(0);
+               unmask_IO_APIC_irq_desc(desc);
                 enable_8259A_irq(0);
                 if (timer_irq_works()) {
                         apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2825,7 +2821,7 @@ static inline void __init check_timer(void)
         apic_printk(APIC_QUIET, KERN_INFO
                     "...trying to set up timer as Virtual Wire IRQ...\n");
  
-       lapic_register_intr(0);
+       lapic_register_intr(0, desc);
         apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
         enable_8259A_irq(0);
  
@@ -2890,12 +2886,12 @@ void __init setup_IO_APIC(void)
         io_apic_irqs = ~PIC_IRQS;
  
         apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-        /*
+       /*
           * Set up IO-APIC IRQ routing.
           */
  #ifdef CONFIG_X86_32
-        if (!acpi_ioapic)
-                setup_ioapic_ids_from_mpc();
+       if (!acpi_ioapic)
+               setup_ioapic_ids_from_mpc();
  #endif
         sync_Arb_IDs();
         setup_IO_APIC_irqs();
@@ -2910,9 +2906,9 @@ void __init setup_IO_APIC(void)
  
  static int __init io_apic_bug_finalize(void)
  {
-        if (sis_apic_bug == -1)
-                sis_apic_bug = 0;
-        return 0;
+       if (sis_apic_bug == -1)
+               sis_apic_bug = 0;
+       return 0;
  }
  
  late_initcall(io_apic_bug_finalize);
@@ -3010,24 +3006,26 @@ unsigned int create_irq_nr(unsigned int irq_want)
         unsigned int irq;
         unsigned int new;
         unsigned long flags;
-       struct irq_cfg *cfg_new;
-
-#ifndef CONFIG_HAVE_SPARSE_IRQ
-       irq_want = nr_irqs - 1;
-#endif
+       struct irq_cfg *cfg_new = NULL;
+       int cpu = boot_cpu_id;
+       struct irq_desc *desc_new = NULL;
  
         irq = 0;
         spin_lock_irqsave(&vector_lock, flags);
-       for (new = irq_want; new > 0; new--) {
+       for (new = irq_want; new < NR_IRQS; new++) {
                 if (platform_legacy_irq(new))
                         continue;
-               cfg_new = irq_cfg(new);
-               if (cfg_new && cfg_new->vector != 0)
+
+               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               if (!desc_new) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", new);
                         continue;
-               /* check if need to create one */
-               if (!cfg_new)
-                       cfg_new = irq_cfg_alloc(new);
-               if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+               }
+               cfg_new = desc_new->chip_data;
+
+               if (cfg_new->vector != 0)
+                       continue;
+               if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
                         irq = new;
                 break;
         }
@@ -3035,15 +3033,21 @@ unsigned int create_irq_nr(unsigned int irq_want)
  
         if (irq > 0) {
                 dynamic_irq_init(irq);
+               /* restore it, in case dynamic_irq_init clear it */
+               if (desc_new)
+                       desc_new->chip_data = cfg_new;
         }
         return irq;
  }
  
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
  int create_irq(void)
  {
+       unsigned int irq_want;
         int irq;
  
-       irq = create_irq_nr(nr_irqs - 1);
+       irq_want = nr_irqs_gsi;
+       irq = create_irq_nr(irq_want);
  
         if (irq == 0)
                 irq = -1;
@@ -3054,14 +3058,22 @@ int create_irq(void)
  void destroy_irq(unsigned int irq)
  {
         unsigned long flags;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
  
+       /* store it, in case dynamic_irq_cleanup clear it */
+       desc = irq_to_desc(irq);
+       cfg = desc->chip_data;
         dynamic_irq_cleanup(irq);
+       /* connect back irq_cfg */
+       if (desc)
+               desc->chip_data = cfg;
  
  #ifdef CONFIG_INTR_REMAP
         free_irte(irq);
  #endif
         spin_lock_irqsave(&vector_lock, flags);
-       __clear_irq_vector(irq);
+       __clear_irq_vector(irq, cfg);
         spin_unlock_irqrestore(&vector_lock, flags);
  }
  
@@ -3076,12 +3088,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
         unsigned dest;
         cpumask_t tmp;
  
+       cfg = irq_cfg(irq);
         tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, tmp);
+       err = assign_irq_vector(irq, cfg, tmp);
         if (err)
                 return err;
  
-       cfg = irq_cfg(irq);
         cpus_and(tmp, cfg->domain, tmp);
         dest = cpu_mask_to_apicid(tmp);
  
@@ -3137,62 +3149,63 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
  }
  
  #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         struct msi_msg msg;
         unsigned int dest;
         cpumask_t tmp;
-       struct irq_desc *desc;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
  
-       if (assign_irq_vector(irq, mask))
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, *mask))
                 return;
  
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
+       set_extra_move_desc(desc, *mask);
+
+       cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
-       read_msi_msg(irq, &msg);
+       read_msi_msg_desc(desc, &msg);
  
         msg.data &= ~MSI_DATA_VECTOR_MASK;
         msg.data |= MSI_DATA_VECTOR(cfg->vector);
         msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
-       write_msi_msg(irq, &msg);
-       desc = irq_to_desc(irq);
-       desc->affinity = mask;
+       write_msi_msg_desc(desc, &msg);
+       cpumask_copy(&desc->affinity, mask);
  }
-
  #ifdef CONFIG_INTR_REMAP
  /*
   * Migrate the MSI irq to another cpumask. This migration is
   * done in the process context using interrupt-remapping hardware.
   */
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void ir_set_msi_irq_affinity(unsigned int irq,
+                                   const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         unsigned int dest;
         cpumask_t tmp, cleanup_mask;
         struct irte irte;
-       struct irq_desc *desc;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
  
         if (get_irte(irq, &irte))
                 return;
  
-       if (assign_irq_vector(irq, mask))
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, *mask))
                 return;
  
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
+       set_extra_move_desc(desc, mask);
+
+       cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
         irte.vector = cfg->vector;
@@ -3215,9 +3228,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
                 cfg->move_in_progress = 0;
         }
  
-       desc = irq_to_desc(irq);
-       desc->affinity = mask;
+       cpumask_copy(&desc->affinity, mask);
  }
+
  #endif
  #endif /* CONFIG_SMP */
  
@@ -3269,14 +3282,14 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
         if (index < 0) {
                 printk(KERN_ERR
                        "Unable to allocate %d IRTE for PCI %s\n", nvec,
-                       pci_name(dev));
+                      pci_name(dev));
                 return -ENOSPC;
         }
         return index;
  }
  #endif
  
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
  {
         int ret;
         struct msi_msg msg;
@@ -3285,7 +3298,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
         if (ret < 0)
                 return ret;
  
-       set_irq_msi(irq, desc);
+       set_irq_msi(irq, msidesc);
         write_msi_msg(irq, &msg);
  
  #ifdef CONFIG_INTR_REMAP
@@ -3300,29 +3313,18 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
  #endif
                 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
  
-       return 0;
-}
-
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
-       unsigned int irq;
+       dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
  
-       irq = dev->bus->number;
-       irq <<= 8;
-       irq |= dev->devfn;
-       irq <<= 12;
-
-       return irq;
+       return 0;
  }
  
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
  {
         unsigned int irq;
         int ret;
         unsigned int irq_want;
  
-       irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
+       irq_want = nr_irqs_gsi;
         irq = create_irq_nr(irq_want);
         if (irq == 0)
                 return -1;
@@ -3336,7 +3338,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
                 goto error;
  no_ir:
  #endif
-       ret = setup_msi_irq(dev, desc, irq);
+       ret = setup_msi_irq(dev, msidesc, irq);
         if (ret < 0) {
                 destroy_irq(irq);
                 return ret;
@@ -3354,7 +3356,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
  {
         unsigned int irq;
         int ret, sub_handle;
-       struct msi_desc *desc;
+       struct msi_desc *msidesc;
         unsigned int irq_want;
  
  #ifdef CONFIG_INTR_REMAP
@@ -3362,10 +3364,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
         int index = 0;
  #endif
  
-       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+       irq_want = nr_irqs_gsi;
         sub_handle = 0;
-       list_for_each_entry(desc, &dev->msi_list, list) {
-               irq = create_irq_nr(irq_want--);
+       list_for_each_entry(msidesc, &dev->msi_list, list) {
+               irq = create_irq_nr(irq_want);
+               irq_want++;
                 if (irq == 0)
                         return -1;
  #ifdef CONFIG_INTR_REMAP
@@ -3397,7 +3400,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
                 }
  no_ir:
  #endif
-               ret = setup_msi_irq(dev, desc, irq);
+               ret = setup_msi_irq(dev, msidesc, irq);
                 if (ret < 0)
                         goto error;
                 sub_handle++;
@@ -3416,23 +3419,24 @@ void arch_teardown_msi_irq(unsigned int irq)
  
  #ifdef CONFIG_DMAR
  #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         struct msi_msg msg;
         unsigned int dest;
         cpumask_t tmp;
-       struct irq_desc *desc;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
  
-       if (assign_irq_vector(irq, mask))
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, *mask))
                 return;
  
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
+       set_extra_move_desc(desc, *mask);
+
+       cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
         dmar_msi_read(irq, &msg);
@@ -3443,9 +3447,9 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
         dmar_msi_write(irq, &msg);
-       desc = irq_to_desc(irq);
-       desc->affinity = mask;
+       cpumask_copy(&desc->affinity, mask);
  }
+
  #endif /* CONFIG_SMP */
  
  struct irq_chip dmar_msi_type = {
@@ -3474,6 +3478,70 @@ int arch_setup_dmar_msi(unsigned int irq)
  }
  #endif
  
+#ifdef CONFIG_HPET_TIMER
+
+#ifdef CONFIG_SMP
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg;
+       struct msi_msg msg;
+       unsigned int dest;
+       cpumask_t tmp;
+
+       if (!cpumask_intersects(mask, cpu_online_mask))
+               return;
+
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, *mask))
+               return;
+
+       set_extra_move_desc(desc, *mask);
+
+       cpumask_and(&tmp, &cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       hpet_msi_read(irq, &msg);
+
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(cfg->vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+       hpet_msi_write(irq, &msg);
+       cpumask_copy(&desc->affinity, mask);
+}
+
+#endif /* CONFIG_SMP */
+
+struct irq_chip hpet_msi_type = {
+       .name = "HPET_MSI",
+       .unmask = hpet_msi_unmask,
+       .mask = hpet_msi_mask,
+       .ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity = hpet_msi_set_affinity,
+#endif
+       .retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_hpet_msi(unsigned int irq)
+{
+       int ret;
+       struct msi_msg msg;
+
+       ret = msi_compose_msg(NULL, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       hpet_msi_write(irq, &msg);
+       set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+               "edge");
+
+       return 0;
+}
+#endif
+
  #endif /* CONFIG_PCI_MSI */
  /*
   * Hypertransport interrupt support
@@ -3496,28 +3564,29 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
         write_ht_irq_msg(irq, &msg);
  }
  
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         unsigned int dest;
         cpumask_t tmp;
-       struct irq_desc *desc;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
  
-       if (assign_irq_vector(irq, mask))
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, *mask))
                 return;
  
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
+       set_extra_move_desc(desc, *mask);
+
+       cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
         target_ht_irq(irq, dest, cfg->vector);
-       desc = irq_to_desc(irq);
-       desc->affinity = mask;
+       cpumask_copy(&desc->affinity, mask);
  }
+
  #endif
  
  static struct irq_chip ht_irq_chip = {
@@ -3537,13 +3606,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
         int err;
         cpumask_t tmp;
  
+       cfg = irq_cfg(irq);
         tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, tmp);
+       err = assign_irq_vector(irq, cfg, tmp);
         if (!err) {
                 struct ht_irq_msg msg;
                 unsigned dest;
  
-               cfg = irq_cfg(irq);
                 cpus_and(tmp, cfg->domain, tmp);
                 dest = cpu_mask_to_apicid(tmp);
  
@@ -3566,11 +3635,103 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
  
                 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
                                               handle_edge_irq, "edge");
+
+               dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
         }
         return err;
  }
  #endif /* CONFIG_HT_IRQ */
  
+#ifdef CONFIG_X86_64
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+                      unsigned long mmr_offset)
+{
+       const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+       struct irq_cfg *cfg;
+       int mmr_pnode;
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       unsigned long flags;
+       int err;
+
+       cfg = irq_cfg(irq);
+
+       err = assign_irq_vector(irq, cfg, *eligible_cpu);
+       if (err != 0)
+               return err;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+                                     irq_name);
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+       entry->vector = cfg->vector;
+       entry->delivery_mode = INT_DELIVERY_MODE;
+       entry->dest_mode = INT_DEST_MODE;
+       entry->polarity = 0;
+       entry->trigger = 0;
+       entry->mask = 0;
+       entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+       return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+{
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       int mmr_pnode;
+
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+       entry->mask = 1;
+
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+#endif /* CONFIG_X86_64 */
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return reg_01.bits.entries;
+}
+
+void __init probe_nr_irqs_gsi(void)
+{
+       int idx;
+       int nr = 0;
+
+       for (idx = 0; idx < nr_ioapics; idx++)
+               nr += io_apic_get_redir_entries(idx) + 1;
+
+       if (nr > nr_irqs_gsi)
+               nr_irqs_gsi = nr;
+}
+
  /* --------------------------------------------------------------------------
                            ACPI-based IOAPIC Configuration
     -------------------------------------------------------------------------- */
@@ -3665,34 +3826,33 @@ int __init io_apic_get_version(int ioapic)
  }
  #endif
  
-int __init io_apic_get_redir_entries (int ioapic)
-{
-       union IO_APIC_reg_01    reg_01;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       reg_01.raw = io_apic_read(ioapic, 1);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       return reg_01.bits.entries;
-}
-
-
  int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
  {
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
+
         if (!IO_APIC_IRQ(irq)) {
                 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
                         ioapic);
                 return -EINVAL;
         }
  
+       desc = irq_to_desc_alloc_cpu(irq, cpu);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
+
         /*
          * IRQs < 16 are already in the irq_2_pin[] map
          */
-       if (irq >= 16)
-               add_pin_to_irq(irq, ioapic, pin);
+       if (irq >= NR_IRQS_LEGACY) {
+               cfg = desc->chip_data;
+               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+       }
  
-       setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
  
         return 0;
  }
@@ -3728,7 +3888,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
  void __init setup_ioapic_dest(void)
  {
         int pin, ioapic, irq, irq_entry;
+       struct irq_desc *desc;
         struct irq_cfg *cfg;
+       cpumask_t mask;
  
         if (skip_ioapic_setup == 1)
                 return;
@@ -3744,17 +3906,31 @@ void __init setup_ioapic_dest(void)
                          * when you have too many devices, because at that time only boot
                          * cpu is online.
                          */
-                       cfg = irq_cfg(irq);
-                       if (!cfg->vector)
-                               setup_IO_APIC_irq(ioapic, pin, irq,
+                       desc = irq_to_desc(irq);
+                       cfg = desc->chip_data;
+                       if (!cfg->vector) {
+                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
                                                   irq_trigger(irq_entry),
                                                   irq_polarity(irq_entry));
+                               continue;
+
+                       }
+
+                       /*
+                        * Honour affinities which have been set in early boot
+                        */
+                       if (desc->status &
+                           (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+                               mask = desc->affinity;
+                       else
+                               mask = TARGET_CPUS;
+
  #ifdef CONFIG_INTR_REMAP
-                       else if (intr_remapping_enabled)
-                               set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
-#endif
+                       if (intr_remapping_enabled)
+                               set_ir_ioapic_affinity_irq_desc(desc, &mask);
                         else
-                               set_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
+                               set_ioapic_affinity_irq_desc(desc, &mask);
                 }
  
         }
@@ -3800,23 +3976,23 @@ static struct resource * __init ioapic_setup_resources(void)
  void __init ioapic_init_mappings(void)
  {
         unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-       int i;
         struct resource *ioapic_res;
+       int i;
  
         ioapic_res = ioapic_setup_resources();
         for (i = 0; i < nr_ioapics; i++) {
                 if (smp_found_config) {
                         ioapic_phys = mp_ioapics[i].mp_apicaddr;
  #ifdef CONFIG_X86_32
-                        if (!ioapic_phys) {
-                                printk(KERN_ERR
-                                       "WARNING: bogus zero IO-APIC "
-                                       "address found in MPTABLE, "
-                                       "disabling IO/APIC support!\n");
-                                smp_found_config = 0;
-                                skip_ioapic_setup = 1;
-                                goto fake_ioapic_page;
-                        }
+                       if (!ioapic_phys) {
+                               printk(KERN_ERR
+                                      "WARNING: bogus zero IO-APIC "
+                                      "address found in MPTABLE, "
+                                      "disabling IO/APIC support!\n");
+                               smp_found_config = 0;
+                               skip_ioapic_setup = 1;
+                               goto fake_ioapic_page;
+                       }
  #endif
                 } else {
  #ifdef CONFIG_X86_32