sparc64: Kill __show_regs().
[safe/jmp/linux-2.6] / arch / sparc64 / kernel / irq.c
index ce05deb..c481673 100644 (file)
@@ -1,7 +1,6 @@
-/* $Id: irq.c,v 1.114 2002/01/11 08:45:38 davem Exp $
- * irq.c: UltraSparc IRQ handling/init/registry.
+/* irq.c: UltraSparc IRQ handling/init/registry.
  *
- * Copyright (C) 1997  David S. Miller  (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
  * Copyright (C) 1998  Eddie C. Dost    (ecd@skynet.be)
  * Copyright (C) 1998  Jakub Jelinek    (jj@ultra.linux.cz)
  */
 #include <asm/cpudata.h>
 #include <asm/auxio.h>
 #include <asm/head.h>
+#include <asm/hypervisor.h>
+#include <asm/cacheflush.h>
 
-/* UPA nodes send interrupt packet to UltraSparc with first data reg
- * value low 5 (7 on Starfire) bits holding the IRQ identifier being
- * delivered.  We must translate this into a non-vector IRQ so we can
- * set the softint on this cpu.
- *
- * To make processing these packets efficient and race free we use
- * an array of irq buckets below.  The interrupt vector handler in
- * entry.S feeds incoming packets into per-cpu pil-indexed lists.
- * The IVEC handler does not need to act atomically, the PIL dispatch
- * code uses CAS to get an atomic snapshot of the list and clear it
- * at the same time.
- *
- * If you make changes to ino_bucket, please update hand coded assembler
- * of the vectored interrupt trap handler(s) in entry.S and sun4v_ivec.S
- */
-struct ino_bucket {
-       /* Next handler in per-CPU IRQ worklist.  We know that
-        * bucket pointers have the high 32-bits clear, so to
-        * save space we only store the bits we need.
-        */
-/*0x00*/unsigned int irq_chain;
-
-       /* Virtual interrupt number assigned to this INO.  */
-/*0x04*/unsigned int virt_irq;
-};
+#include "entry.h"
 
 #define NUM_IVECS      (IMAP_INR + 1)
-struct ino_bucket ivector_table[NUM_IVECS] __attribute__ ((aligned (SMP_CACHE_BYTES)));
-
-#define __irq_ino(irq) \
-        (((struct ino_bucket *)(unsigned long)(irq)) - &ivector_table[0])
-#define __bucket(irq) ((struct ino_bucket *)(unsigned long)(irq))
-#define __irq(bucket) ((unsigned int)(unsigned long)(bucket))
-
-/* This has to be in the main kernel image, it cannot be
- * turned into per-cpu data.  The reason is that the main
- * kernel image is locked into the TLB and this structure
- * is accessed from the vectored interrupt trap handler.  If
- * access to this structure takes a TLB miss it could cause
- * the 5-level sparc v9 trap stack to overflow.
+
+struct ino_bucket *ivector_table;
+unsigned long ivector_table_pa;
+
+/* On several sun4u processors, it is illegal to mix bypass and
+ * non-bypass accesses.  Therefore we access all INO buckets
+ * using bypass accesses only.
  */
-#define irq_work(__cpu)        &(trap_block[(__cpu)].irq_worklist)
+static unsigned long bucket_get_chain_pa(unsigned long bucket_pa)
+{
+       unsigned long ret;
+
+       __asm__ __volatile__("ldxa      [%1] %2, %0"
+                            : "=&r" (ret)
+                            : "r" (bucket_pa +
+                                   offsetof(struct ino_bucket,
+                                            __irq_chain_pa)),
+                              "i" (ASI_PHYS_USE_EC));
 
-static unsigned int virt_to_real_irq_table[NR_IRQS];
-static unsigned char virt_irq_cur = 1;
+       return ret;
+}
+
+static void bucket_clear_chain_pa(unsigned long bucket_pa)
+{
+       __asm__ __volatile__("stxa      %%g0, [%0] %1"
+                            : /* no outputs */
+                            : "r" (bucket_pa +
+                                   offsetof(struct ino_bucket,
+                                            __irq_chain_pa)),
+                              "i" (ASI_PHYS_USE_EC));
+}
+
+static unsigned int bucket_get_virt_irq(unsigned long bucket_pa)
+{
+       unsigned int ret;
+
+       __asm__ __volatile__("lduwa     [%1] %2, %0"
+                            : "=&r" (ret)
+                            : "r" (bucket_pa +
+                                   offsetof(struct ino_bucket,
+                                            __virt_irq)),
+                              "i" (ASI_PHYS_USE_EC));
 
-static unsigned char virt_irq_alloc(unsigned int real_irq)
+       return ret;
+}
+
+static void bucket_set_virt_irq(unsigned long bucket_pa,
+                               unsigned int virt_irq)
 {
+       __asm__ __volatile__("stwa      %0, [%1] %2"
+                            : /* no outputs */
+                            : "r" (virt_irq),
+                              "r" (bucket_pa +
+                                   offsetof(struct ino_bucket,
+                                            __virt_irq)),
+                              "i" (ASI_PHYS_USE_EC));
+}
+
+#define irq_work_pa(__cpu)     &(trap_block[(__cpu)].irq_worklist_pa)
+
+static struct {
+       unsigned int dev_handle;
+       unsigned int dev_ino;
+       unsigned int in_use;
+} virt_irq_table[NR_IRQS];
+static DEFINE_SPINLOCK(virt_irq_alloc_lock);
+
+unsigned char virt_irq_alloc(unsigned int dev_handle,
+                            unsigned int dev_ino)
+{
+       unsigned long flags;
        unsigned char ent;
 
        BUILD_BUG_ON(NR_IRQS >= 256);
 
-       ent = virt_irq_cur;
+       spin_lock_irqsave(&virt_irq_alloc_lock, flags);
+
+       for (ent = 1; ent < NR_IRQS; ent++) {
+               if (!virt_irq_table[ent].in_use)
+                       break;
+       }
        if (ent >= NR_IRQS) {
                printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
-               return 0;
+               ent = 0;
+       } else {
+               virt_irq_table[ent].dev_handle = dev_handle;
+               virt_irq_table[ent].dev_ino = dev_ino;
+               virt_irq_table[ent].in_use = 1;
        }
 
-       virt_irq_cur = ent + 1;
-       virt_to_real_irq_table[ent] = real_irq;
+       spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
 
        return ent;
 }
 
-#if 0 /* Currently unused. */
-static unsigned char real_to_virt_irq(unsigned int real_irq)
+#ifdef CONFIG_PCI_MSI
+void virt_irq_free(unsigned int virt_irq)
 {
-       struct ino_bucket *bucket = __bucket(real_irq);
+       unsigned long flags;
 
-       return bucket->virt_irq;
-}
-#endif
+       if (virt_irq >= NR_IRQS)
+               return;
 
-static unsigned int virt_to_real_irq(unsigned char virt_irq)
-{
-       return virt_to_real_irq_table[virt_irq];
+       spin_lock_irqsave(&virt_irq_alloc_lock, flags);
+
+       virt_irq_table[virt_irq].in_use = 0;
+
+       spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
 }
+#endif
 
 /*
  * /proc/interrupts printing:
@@ -163,8 +200,6 @@ skip:
        return 0;
 }
 
-extern unsigned long real_hard_smp_processor_id(void);
-
 static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 {
        unsigned int tid;
@@ -205,21 +240,10 @@ struct irq_handler_data {
        unsigned long   imap;
 
        void            (*pre_handler)(unsigned int, void *, void *);
-       void            *pre_handler_arg1;
-       void            *pre_handler_arg2;
+       void            *arg1;
+       void            *arg2;
 };
 
-static inline struct ino_bucket *virt_irq_to_bucket(unsigned int virt_irq)
-{
-       unsigned int real_irq = virt_to_real_irq(virt_irq);
-       struct ino_bucket *bucket = NULL;
-
-       if (likely(real_irq))
-               bucket = __bucket(real_irq);
-
-       return bucket;
-}
-
 #ifdef CONFIG_SMP
 static int irq_choose_cpu(unsigned int virt_irq)
 {
@@ -268,11 +292,10 @@ static int irq_choose_cpu(unsigned int virt_irq)
 
 static void sun4u_irq_enable(unsigned int virt_irq)
 {
-       irq_desc_t *desc = irq_desc + virt_irq;
-       struct irq_handler_data *data = desc->handler_data;
+       struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 
        if (likely(data)) {
-               unsigned long cpuid, imap;
+               unsigned long cpuid, imap, val;
                unsigned int tid;
 
                cpuid = irq_choose_cpu(virt_irq);
@@ -280,165 +303,257 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 
                tid = sun4u_compute_tid(imap, cpuid);
 
-               upa_writel(tid | IMAP_VALID, imap);
+               val = upa_readq(imap);
+               val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS |
+                        IMAP_AID_SAFARI | IMAP_NID_SAFARI);
+               val |= tid | IMAP_VALID;
+               upa_writeq(val, imap);
+               upa_writeq(ICLR_IDLE, data->iclr);
        }
 }
 
+static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
+{
+       sun4u_irq_enable(virt_irq);
+}
+
 static void sun4u_irq_disable(unsigned int virt_irq)
 {
-       irq_desc_t *desc = irq_desc + virt_irq;
-       struct irq_handler_data *data = desc->handler_data;
+       struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 
        if (likely(data)) {
                unsigned long imap = data->imap;
-               u32 tmp = upa_readl(imap);
+               unsigned long tmp = upa_readq(imap);
 
                tmp &= ~IMAP_VALID;
-               upa_writel(tmp, imap);
+               upa_writeq(tmp, imap);
        }
 }
 
-static void sun4u_irq_end(unsigned int virt_irq)
+static void sun4u_irq_eoi(unsigned int virt_irq)
 {
-       irq_desc_t *desc = irq_desc + virt_irq;
-       struct irq_handler_data *data = desc->handler_data;
+       struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+       struct irq_desc *desc = irq_desc + virt_irq;
+
+       if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+               return;
 
        if (likely(data))
-               upa_writel(ICLR_IDLE, data->iclr);
+               upa_writeq(ICLR_IDLE, data->iclr);
 }
 
 static void sun4v_irq_enable(unsigned int virt_irq)
 {
-       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
-       unsigned int ino = bucket - &ivector_table[0];
+       unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+       unsigned long cpuid = irq_choose_cpu(virt_irq);
+       int err;
 
-       if (likely(bucket)) {
-               unsigned long cpuid;
-               int err;
+       err = sun4v_intr_settarget(ino, cpuid);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+                      "err(%d)\n", ino, cpuid, err);
+       err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_intr_setstate(%x): "
+                      "err(%d)\n", ino, err);
+       err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n",
+                      ino, err);
+}
 
-               cpuid = irq_choose_cpu(virt_irq);
+static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
+{
+       unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+       unsigned long cpuid = irq_choose_cpu(virt_irq);
+       int err;
 
-               err = sun4v_intr_settarget(ino, cpuid);
-               if (err != HV_EOK)
-                       printk("sun4v_intr_settarget(%x,%lu): err(%d)\n",
-                              ino, cpuid, err);
-               err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
-               if (err != HV_EOK)
-                       printk("sun4v_intr_setenabled(%x): err(%d)\n",
-                              ino, err);
-       }
+       err = sun4v_intr_settarget(ino, cpuid);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+                      "err(%d)\n", ino, cpuid, err);
 }
 
 static void sun4v_irq_disable(unsigned int virt_irq)
 {
-       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
-       unsigned int ino = bucket - &ivector_table[0];
+       unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+       int err;
 
-       if (likely(bucket)) {
-               int err;
+       err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_intr_setenabled(%x): "
+                      "err(%d)\n", ino, err);
+}
 
-               err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
-               if (err != HV_EOK)
-                       printk("sun4v_intr_setenabled(%x): "
-                              "err(%d)\n", ino, err);
-       }
+static void sun4v_irq_eoi(unsigned int virt_irq)
+{
+       unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+       struct irq_desc *desc = irq_desc + virt_irq;
+       int err;
+
+       if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+               return;
+
+       err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_intr_setstate(%x): "
+                      "err(%d)\n", ino, err);
 }
 
-static void sun4v_irq_end(unsigned int virt_irq)
+static void sun4v_virq_enable(unsigned int virt_irq)
 {
-       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
-       unsigned int ino = bucket - &ivector_table[0];
+       unsigned long cpuid, dev_handle, dev_ino;
+       int err;
 
-       if (likely(bucket)) {
-               int err;
+       cpuid = irq_choose_cpu(virt_irq);
 
-               err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
-               if (err != HV_EOK)
-                       printk("sun4v_intr_setstate(%x): "
-                              "err(%d)\n", ino, err);
-       }
+       dev_handle = virt_irq_table[virt_irq].dev_handle;
+       dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+       err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+                      "err(%d)\n",
+                      dev_handle, dev_ino, cpuid, err);
+       err = sun4v_vintr_set_state(dev_handle, dev_ino,
+                                   HV_INTR_STATE_IDLE);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+                      "HV_INTR_STATE_IDLE): err(%d)\n",
+                      dev_handle, dev_ino, err);
+       err = sun4v_vintr_set_valid(dev_handle, dev_ino,
+                                   HV_INTR_ENABLED);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+                      "HV_INTR_ENABLED): err(%d)\n",
+                      dev_handle, dev_ino, err);
 }
 
-static void run_pre_handler(unsigned int virt_irq)
+static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
 {
-       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
-       irq_desc_t *desc = irq_desc + virt_irq;
-       struct irq_handler_data *data = desc->handler_data;
+       unsigned long cpuid, dev_handle, dev_ino;
+       int err;
 
-       if (likely(data->pre_handler)) {
-               data->pre_handler(__irq_ino(__irq(bucket)),
-                                 data->pre_handler_arg1,
-                                 data->pre_handler_arg2);
-       }
+       cpuid = irq_choose_cpu(virt_irq);
+
+       dev_handle = virt_irq_table[virt_irq].dev_handle;
+       dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+       err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+                      "err(%d)\n",
+                      dev_handle, dev_ino, cpuid, err);
 }
 
-static struct hw_interrupt_type sun4u_irq = {
-       .typename       = "sun4u",
-       .enable         = sun4u_irq_enable,
-       .disable        = sun4u_irq_disable,
-       .end            = sun4u_irq_end,
-};
+static void sun4v_virq_disable(unsigned int virt_irq)
+{
+       unsigned long dev_handle, dev_ino;
+       int err;
+
+       dev_handle = virt_irq_table[virt_irq].dev_handle;
+       dev_ino = virt_irq_table[virt_irq].dev_ino;
+
+       err = sun4v_vintr_set_valid(dev_handle, dev_ino,
+                                   HV_INTR_DISABLED);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+                      "HV_INTR_DISABLED): err(%d)\n",
+                      dev_handle, dev_ino, err);
+}
+
+static void sun4v_virq_eoi(unsigned int virt_irq)
+{
+       struct irq_desc *desc = irq_desc + virt_irq;
+       unsigned long dev_handle, dev_ino;
+       int err;
+
+       if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+               return;
+
+       dev_handle = virt_irq_table[virt_irq].dev_handle;
+       dev_ino = virt_irq_table[virt_irq].dev_ino;
 
-static struct hw_interrupt_type sun4u_irq_ack = {
-       .typename       = "sun4u+ack",
+       err = sun4v_vintr_set_state(dev_handle, dev_ino,
+                                   HV_INTR_STATE_IDLE);
+       if (err != HV_EOK)
+               printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+                      "HV_INTR_STATE_IDLE): err(%d)\n",
+                      dev_handle, dev_ino, err);
+}
+
+static struct irq_chip sun4u_irq = {
+       .typename       = "sun4u",
        .enable         = sun4u_irq_enable,
        .disable        = sun4u_irq_disable,
-       .ack            = run_pre_handler,
-       .end            = sun4u_irq_end,
+       .eoi            = sun4u_irq_eoi,
+       .set_affinity   = sun4u_set_affinity,
 };
 
-static struct hw_interrupt_type sun4v_irq = {
+static struct irq_chip sun4v_irq = {
        .typename       = "sun4v",
        .enable         = sun4v_irq_enable,
        .disable        = sun4v_irq_disable,
-       .end            = sun4v_irq_end,
+       .eoi            = sun4v_irq_eoi,
+       .set_affinity   = sun4v_set_affinity,
 };
 
-static struct hw_interrupt_type sun4v_irq_ack = {
-       .typename       = "sun4v+ack",
-       .enable         = sun4v_irq_enable,
-       .disable        = sun4v_irq_disable,
-       .ack            = run_pre_handler,
-       .end            = sun4v_irq_end,
+static struct irq_chip sun4v_virq = {
+       .typename       = "vsun4v",
+       .enable         = sun4v_virq_enable,
+       .disable        = sun4v_virq_disable,
+       .eoi            = sun4v_virq_eoi,
+       .set_affinity   = sun4v_virt_set_affinity,
 };
 
+static void pre_flow_handler(unsigned int virt_irq,
+                                     struct irq_desc *desc)
+{
+       struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+       unsigned int ino = virt_irq_table[virt_irq].dev_ino;
+
+       data->pre_handler(ino, data->arg1, data->arg2);
+
+       handle_fasteoi_irq(virt_irq, desc);
+}
+
 void irq_install_pre_handler(int virt_irq,
                             void (*func)(unsigned int, void *, void *),
                             void *arg1, void *arg2)
 {
-       irq_desc_t *desc = irq_desc + virt_irq;
-       struct irq_handler_data *data = desc->handler_data;
+       struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+       struct irq_desc *desc = irq_desc + virt_irq;
 
        data->pre_handler = func;
-       data->pre_handler_arg1 = arg1;
-       data->pre_handler_arg2 = arg2;
+       data->arg1 = arg1;
+       data->arg2 = arg2;
 
-       if (desc->chip == &sun4u_irq_ack ||
-           desc->chip == &sun4v_irq_ack)
-               return;
-
-       desc->chip = (desc->chip == &sun4u_irq ?
-                     &sun4u_irq_ack : &sun4v_irq_ack);
+       desc->handle_irq = pre_flow_handler;
 }
 
 unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
 {
        struct ino_bucket *bucket;
        struct irq_handler_data *data;
-       irq_desc_t *desc;
+       unsigned int virt_irq;
        int ino;
 
        BUG_ON(tlb_type == hypervisor);
 
-       ino = (upa_readl(imap) & (IMAP_IGN | IMAP_INO)) + inofixup;
+       ino = (upa_readq(imap) & (IMAP_IGN | IMAP_INO)) + inofixup;
        bucket = &ivector_table[ino];
-       if (!bucket->virt_irq) {
-               bucket->virt_irq = virt_irq_alloc(__irq(bucket));
-               irq_desc[bucket->virt_irq].chip = &sun4u_irq;
+       virt_irq = bucket_get_virt_irq(__pa(bucket));
+       if (!virt_irq) {
+               virt_irq = virt_irq_alloc(0, ino);
+               bucket_set_virt_irq(__pa(bucket), virt_irq);
+               set_irq_chip_and_handler_name(virt_irq,
+                                             &sun4u_irq,
+                                             handle_fasteoi_irq,
+                                             "IVEC");
        }
 
-       desc = irq_desc + bucket->virt_irq;
-       if (unlikely(desc->handler_data))
+       data = get_irq_chip_data(virt_irq);
+       if (unlikely(data))
                goto out;
 
        data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
@@ -446,33 +561,36 @@ unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
                prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
                prom_halt();
        }
-       desc->handler_data = data;
+       set_irq_chip_data(virt_irq, data);
 
        data->imap  = imap;
        data->iclr  = iclr;
 
 out:
-       return bucket->virt_irq;
+       return virt_irq;
 }
 
-unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
+static unsigned int sun4v_build_common(unsigned long sysino,
+                                      struct irq_chip *chip)
 {
        struct ino_bucket *bucket;
        struct irq_handler_data *data;
-       unsigned long sysino;
-       irq_desc_t *desc;
+       unsigned int virt_irq;
 
        BUG_ON(tlb_type != hypervisor);
 
-       sysino = sun4v_devino_to_sysino(devhandle, devino);
        bucket = &ivector_table[sysino];
-       if (!bucket->virt_irq) {
-               bucket->virt_irq = virt_irq_alloc(__irq(bucket));
-               irq_desc[bucket->virt_irq].chip = &sun4v_irq;
+       virt_irq = bucket_get_virt_irq(__pa(bucket));
+       if (!virt_irq) {
+               virt_irq = virt_irq_alloc(0, sysino);
+               bucket_set_virt_irq(__pa(bucket), virt_irq);
+               set_irq_chip_and_handler_name(virt_irq, chip,
+                                             handle_fasteoi_irq,
+                                             "IVEC");
        }
 
-       desc = irq_desc + bucket->virt_irq;
-       if (unlikely(desc->handler_data))
+       data = get_irq_chip_data(virt_irq);
+       if (unlikely(data))
                goto out;
 
        data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
@@ -480,7 +598,7 @@ unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
                prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
                prom_halt();
        }
-       desc->handler_data = data;
+       set_irq_chip_data(virt_irq, data);
 
        /* Catch accidental accesses to these things.  IMAP/ICLR handling
         * is done by hypervisor calls on sun4v platforms, not by direct
@@ -490,63 +608,83 @@ unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
        data->iclr = ~0UL;
 
 out:
-       return bucket->virt_irq;
+       return virt_irq;
 }
 
-void hw_resend_irq(struct hw_interrupt_type *handler, unsigned int virt_irq)
+unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
 {
-       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
-       unsigned long pstate;
-       unsigned int *ent;
+       unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
 
-       __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
-       __asm__ __volatile__("wrpr %0, %1, %%pstate"
-                            : : "r" (pstate), "i" (PSTATE_IE));
-       ent = irq_work(smp_processor_id());
-       bucket->irq_chain = *ent;
-       *ent = __irq(bucket);
-       set_softint(1 << PIL_DEVICE_IRQ);
-       __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate));
+       return sun4v_build_common(sysino, &sun4v_irq);
 }
 
-void ack_bad_irq(unsigned int virt_irq)
+unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
 {
-       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
-       unsigned int ino = 0xdeadbeef;
+       struct irq_handler_data *data;
+       unsigned long hv_err, cookie;
+       struct ino_bucket *bucket;
+       struct irq_desc *desc;
+       unsigned int virt_irq;
 
-       if (bucket)
-               ino = bucket - &ivector_table[0];
+       bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
+       if (unlikely(!bucket))
+               return 0;
+       __flush_dcache_range((unsigned long) bucket,
+                            ((unsigned long) bucket +
+                             sizeof(struct ino_bucket)));
 
-       printk(KERN_CRIT "Unexpected IRQ from ino[%x] virt_irq[%u]\n",
-              ino, virt_irq);
-}
+       virt_irq = virt_irq_alloc(devhandle, devino);
+       bucket_set_virt_irq(__pa(bucket), virt_irq);
 
-#ifndef CONFIG_SMP
-extern irqreturn_t timer_interrupt(int, void *, struct pt_regs *);
+       set_irq_chip_and_handler_name(virt_irq, &sun4v_virq,
+                                     handle_fasteoi_irq,
+                                     "IVEC");
 
-void timer_irq(int irq, struct pt_regs *regs)
-{
-       unsigned long clr_mask = 1 << irq;
-       unsigned long tick_mask = tick_ops->softint_mask;
+       data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+       if (unlikely(!data))
+               return 0;
 
-       if (get_softint() & tick_mask) {
-               irq = 0;
-               clr_mask = tick_mask;
+       /* In order to make the LDC channel startup sequence easier,
+        * especially wrt. locking, we do not let request_irq() enable
+        * the interrupt.
+        */
+       desc = irq_desc + virt_irq;
+       desc->status |= IRQ_NOAUTOEN;
+
+       set_irq_chip_data(virt_irq, data);
+
+       /* Catch accidental accesses to these things.  IMAP/ICLR handling
+        * is done by hypervisor calls on sun4v platforms, not by direct
+        * register accesses.
+        */
+       data->imap = ~0UL;
+       data->iclr = ~0UL;
+
+       cookie = ~__pa(bucket);
+       hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie);
+       if (hv_err) {
+               prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] "
+                           "err=%lu\n", devhandle, devino, hv_err);
+               prom_halt();
        }
-       clear_softint(clr_mask);
 
-       irq_enter();
+       return virt_irq;
+}
 
-       kstat_this_cpu.irqs[0]++;
-       timer_interrupt(irq, NULL, regs);
+void ack_bad_irq(unsigned int virt_irq)
+{
+       unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 
-       irq_exit();
+       if (!ino)
+               ino = 0xdeadbeef;
+
+       printk(KERN_CRIT "Unexpected IRQ from ino[%x] virt_irq[%u]\n",
+              ino, virt_irq);
 }
-#endif
 
 void handler_irq(int irq, struct pt_regs *regs)
 {
-       struct ino_bucket *bucket;
+       unsigned long pstate, bucket_pa;
        struct pt_regs *old_regs;
 
        clear_softint(1 << irq);
@@ -554,21 +692,57 @@ void handler_irq(int irq, struct pt_regs *regs)
        old_regs = set_irq_regs(regs);
        irq_enter();
 
-       /* Sliiiick... */
-       bucket = __bucket(xchg32(irq_work(smp_processor_id()), 0));
-       while (bucket) {
-               struct ino_bucket *next = __bucket(bucket->irq_chain);
+       /* Grab an atomic snapshot of the pending IVECs.  */
+       __asm__ __volatile__("rdpr      %%pstate, %0\n\t"
+                            "wrpr      %0, %3, %%pstate\n\t"
+                            "ldx       [%2], %1\n\t"
+                            "stx       %%g0, [%2]\n\t"
+                            "wrpr      %0, 0x0, %%pstate\n\t"
+                            : "=&r" (pstate), "=&r" (bucket_pa)
+                            : "r" (irq_work_pa(smp_processor_id())),
+                              "i" (PSTATE_IE)
+                            : "memory");
+
+       while (bucket_pa) {
+               struct irq_desc *desc;
+               unsigned long next_pa;
+               unsigned int virt_irq;
+
+               next_pa = bucket_get_chain_pa(bucket_pa);
+               virt_irq = bucket_get_virt_irq(bucket_pa);
+               bucket_clear_chain_pa(bucket_pa);
+
+               desc = irq_desc + virt_irq;
 
-               bucket->irq_chain = 0;
-               __do_IRQ(bucket->virt_irq);
+               desc->handle_irq(virt_irq, desc);
 
-               bucket = next;
+               bucket_pa = next_pa;
        }
 
        irq_exit();
        set_irq_regs(old_regs);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(void)
+{
+       unsigned int irq;
+
+       for (irq = 0; irq < NR_IRQS; irq++) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&irq_desc[irq].lock, flags);
+               if (irq_desc[irq].action &&
+                   !(irq_desc[irq].status & IRQ_PER_CPU)) {
+                       if (irq_desc[irq].chip->set_affinity)
+                               irq_desc[irq].chip->set_affinity(irq,
+                                       irq_desc[irq].affinity);
+               }
+               spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
+       }
+}
+#endif
+
 struct sun5_timer {
        u64     count0;
        u64     limit0;
@@ -582,7 +756,7 @@ static u64 prom_limit0, prom_limit1;
 static void map_prom_timers(void)
 {
        struct device_node *dp;
-       unsigned int *addr;
+       const unsigned int *addr;
 
        /* PROM timer node hangs out in the top level of device siblings... */
        dp = of_find_node_by_path("/");
@@ -642,12 +816,23 @@ void init_irqwork_curcpu(void)
 {
        int cpu = hard_smp_processor_id();
 
-       trap_block[cpu].irq_worklist = 0;
+       trap_block[cpu].irq_worklist_pa = 0UL;
 }
 
-static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type)
+/* Please be very careful with register_one_mondo() and
+ * sun4v_register_mondo_queues().
+ *
+ * On SMP this gets invoked from the CPU trampoline before
+ * the cpu has fully taken over the trap table from OBP,
+ * and it's kernel stack + %g6 thread register state is
+ * not fully cooked yet.
+ *
+ * Therefore you cannot make any OBP calls, not even prom_printf,
+ * from these two routines.
+ */
+static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask)
 {
-       unsigned long num_entries = 128;
+       unsigned long num_entries = (qmask + 1) / 64;
        unsigned long status;
 
        status = sun4v_cpu_qconf(type, paddr, num_entries);
@@ -658,62 +843,53 @@ static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type
        }
 }
 
-static void __cpuinit sun4v_register_mondo_queues(int this_cpu)
+void __cpuinit sun4v_register_mondo_queues(int this_cpu)
 {
        struct trap_per_cpu *tb = &trap_block[this_cpu];
 
-       register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO);
-       register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO);
-       register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR);
-       register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR);
+       register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO,
+                          tb->cpu_mondo_qmask);
+       register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO,
+                          tb->dev_mondo_qmask);
+       register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR,
+                          tb->resum_qmask);
+       register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR,
+                          tb->nonresum_qmask);
 }
 
-static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, int use_bootmem)
+static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask)
 {
-       void *page;
-
-       if (use_bootmem)
-               page = alloc_bootmem_low_pages(PAGE_SIZE);
-       else
-               page = (void *) get_zeroed_page(GFP_ATOMIC);
-
-       if (!page) {
+       unsigned long size = PAGE_ALIGN(qmask + 1);
+       void *p = __alloc_bootmem(size, size, 0);
+       if (!p) {
                prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
                prom_halt();
        }
 
-       *pa_ptr = __pa(page);
+       *pa_ptr = __pa(p);
 }
 
-static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, int use_bootmem)
+static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
 {
-       void *page;
+       unsigned long size = PAGE_ALIGN(qmask + 1);
+       void *p = __alloc_bootmem(size, size, 0);
 
-       if (use_bootmem)
-               page = alloc_bootmem_low_pages(PAGE_SIZE);
-       else
-               page = (void *) get_zeroed_page(GFP_ATOMIC);
-
-       if (!page) {
+       if (!p) {
                prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
                prom_halt();
        }
 
-       *pa_ptr = __pa(page);
+       *pa_ptr = __pa(p);
 }
 
-static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem)
+static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
 {
 #ifdef CONFIG_SMP
        void *page;
 
        BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
 
-       if (use_bootmem)
-               page = alloc_bootmem_low_pages(PAGE_SIZE);
-       else
-               page = (void *) get_zeroed_page(GFP_ATOMIC);
-
+       page = alloc_bootmem_pages(PAGE_SIZE);
        if (!page) {
                prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
                prom_halt();
@@ -724,30 +900,27 @@ static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_
 #endif
 }
 
-/* Allocate and register the mondo and error queues for this cpu.  */
-void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load)
+/* Allocate mondo and error queues for all possible cpus.  */
+static void __init sun4v_init_mondo_queues(void)
 {
-       struct trap_per_cpu *tb = &trap_block[cpu];
+       int cpu;
 
-       if (alloc) {
-               alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem);
-               alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem);
-               alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem);
-               alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem);
-               alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem);
-               alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem);
+       for_each_possible_cpu(cpu) {
+               struct trap_per_cpu *tb = &trap_block[cpu];
 
-               init_cpu_send_mondo_info(tb, use_bootmem);
-       }
+               alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
+               alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
+               alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask);
+               alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask);
+               alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
+               alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
+                              tb->nonresum_qmask);
 
-       if (load) {
-               if (cpu != hard_smp_processor_id()) {
-                       prom_printf("SUN4V: init mondo on cpu %d not %d\n",
-                                   cpu, hard_smp_processor_id());
-                       prom_halt();
-               }
-               sun4v_register_mondo_queues(cpu);
+               init_cpu_send_mondo_info(tb);
        }
+
+       /* Load up the boot cpu's entries.  */
+       sun4v_register_mondo_queues(hard_smp_processor_id());
 }
 
 static struct irqaction timer_irq_action = {
@@ -757,12 +930,24 @@ static struct irqaction timer_irq_action = {
 /* Only invoked on boot processor. */
 void __init init_IRQ(void)
 {
+       unsigned long size;
+
        map_prom_timers();
        kill_prom_timer();
-       memset(&ivector_table[0], 0, sizeof(ivector_table));
+
+       size = sizeof(struct ino_bucket) * NUM_IVECS;
+       ivector_table = alloc_bootmem(size);
+       if (!ivector_table) {
+               prom_printf("Fatal error, cannot allocate ivector_table\n");
+               prom_halt();
+       }
+       __flush_dcache_range((unsigned long) ivector_table,
+                            ((unsigned long) ivector_table) + size);
+
+       ivector_table_pa = __pa(ivector_table);
 
        if (tlb_type == hypervisor)
-               sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1);
+               sun4v_init_mondo_queues();
 
        /* We need to clear any IRQ's pending in the soft interrupt
         * registers, a spurious one could be left around from the