powerpc/xics: Factor out cpu joining/unjoining the GIQ
[safe/jmp/linux-2.6] / arch / powerpc / platforms / pseries / xics.c
index c7f0442..0bb5533 100644 (file)
@@ -8,31 +8,30 @@
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
  */
+
 #include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/kernel.h>
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
-#include <linux/signal.h>
 #include <linux/init.h>
-#include <linux/gfp.h>
 #include <linux/radix-tree.h>
 #include <linux/cpu.h>
+#include <linux/of.h>
+
 #include <asm/firmware.h>
-#include <asm/prom.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/smp.h>
 #include <asm/rtas.h>
 #include <asm/hvcall.h>
 #include <asm/machdep.h>
-#include <asm/i8259.h>
 
 #include "xics.h"
+#include "plpar_wrappers.h"
 
-/* This is used to map real irq numbers to virtual */
-static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC);
+static struct irq_host *xics_host;
 
 #define XICS_IPI               2
 #define XICS_IRQ_SPURIOUS      0
@@ -46,6 +45,20 @@ static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC);
  */
 #define IPI_PRIORITY           4
 
+static unsigned int default_server = 0xFF;
+static unsigned int default_distrib_server = 0;
+static unsigned int interrupt_server_size = 8;
+
+/* RTAS service tokens */
+static int ibm_get_xive;
+static int ibm_set_xive;
+static int ibm_int_on;
+static int ibm_int_off;
+
+
+/* Direct hardware low level accessors */
+
+/* The part of the interrupt presentation layer that we care about */
 struct xics_ipl {
        union {
                u32 word;
@@ -64,40 +77,25 @@ struct xics_ipl {
 
 static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
 
-static int xics_irq_8259_cascade = 0;
-static int xics_irq_8259_cascade_real = 0;
-static unsigned int default_server = 0xFF;
-static unsigned int default_distrib_server = 0;
-static unsigned int interrupt_server_size = 8;
-
-/*
- * XICS only has a single IPI, so encode the messages per CPU
- */
-struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned;
-
-/* RTAS service tokens */
-static int ibm_get_xive;
-static int ibm_set_xive;
-static int ibm_int_on;
-static int ibm_int_off;
-
-
-/* Direct HW low level accessors */
-
-
-static inline int direct_xirr_info_get(int n_cpu)
+static inline unsigned int direct_xirr_info_get(void)
 {
-       return in_be32(&xics_per_cpu[n_cpu]->xirr.word);
+       int cpu = smp_processor_id();
+
+       return in_be32(&xics_per_cpu[cpu]->xirr.word);
 }
 
-static inline void direct_xirr_info_set(int n_cpu, int value)
+static inline void direct_xirr_info_set(unsigned int value)
 {
-       out_be32(&xics_per_cpu[n_cpu]->xirr.word, value);
+       int cpu = smp_processor_id();
+
+       out_be32(&xics_per_cpu[cpu]->xirr.word, value);
 }
 
-static inline void direct_cppr_info(int n_cpu, u8 value)
+static inline void direct_cppr_info(u8 value)
 {
-       out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value);
+       int cpu = smp_processor_id();
+
+       out_8(&xics_per_cpu[cpu]->xirr.bytes[0], value);
 }
 
 static inline void direct_qirr_info(int n_cpu, u8 value)
@@ -108,29 +106,7 @@ static inline void direct_qirr_info(int n_cpu, u8 value)
 
 /* LPAR low level accessors */
 
-
-static inline long plpar_eoi(unsigned long xirr)
-{
-       return plpar_hcall_norets(H_EOI, xirr);
-}
-
-static inline long plpar_cppr(unsigned long cppr)
-{
-       return plpar_hcall_norets(H_CPPR, cppr);
-}
-
-static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
-{
-       return plpar_hcall_norets(H_IPI, servernum, mfrr);
-}
-
-static inline long plpar_xirr(unsigned long *xirr_ret)
-{
-       unsigned long dummy;
-       return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy);
-}
-
-static inline int lpar_xirr_info_get(int n_cpu)
+static inline unsigned int lpar_xirr_info_get(void)
 {
        unsigned long lpar_rc;
        unsigned long return_value;
@@ -138,21 +114,20 @@ static inline int lpar_xirr_info_get(int n_cpu)
        lpar_rc = plpar_xirr(&return_value);
        if (lpar_rc != H_SUCCESS)
                panic(" bad return code xirr - rc = %lx \n", lpar_rc);
-       return (int)return_value;
+       return (unsigned int)return_value;
 }
 
-static inline void lpar_xirr_info_set(int n_cpu, int value)
+static inline void lpar_xirr_info_set(unsigned int value)
 {
        unsigned long lpar_rc;
-       unsigned long val64 = value & 0xffffffff;
 
-       lpar_rc = plpar_eoi(val64);
+       lpar_rc = plpar_eoi(value);
        if (lpar_rc != H_SUCCESS)
-               panic("bad return code EOI - rc = %ld, value=%lx\n", lpar_rc,
-                     val64);
+               panic("bad return code EOI - rc = %ld, value=%x\n", lpar_rc,
+                     value);
 }
 
-static inline void lpar_cppr_info(int n_cpu, u8 value)
+static inline void lpar_cppr_info(u8 value)
 {
        unsigned long lpar_rc;
 
@@ -171,54 +146,57 @@ static inline void lpar_qirr_info(int n_cpu , u8 value)
 }
 
 
-/* High level handlers and init code */
-
+/* Interface to generic irq subsystem */
 
 #ifdef CONFIG_SMP
-static int get_irq_server(unsigned int irq)
+static int get_irq_server(unsigned int virq, unsigned int strict_check)
 {
-       unsigned int server;
+       int server;
        /* For the moment only implement delivery to all cpus or one cpu */
-       cpumask_t cpumask = irq_desc[irq].affinity;
+       cpumask_t cpumask = irq_desc[virq].affinity;
        cpumask_t tmp = CPU_MASK_NONE;
 
        if (!distribute_irqs)
                return default_server;
 
-       if (cpus_equal(cpumask, CPU_MASK_ALL)) {
-               server = default_distrib_server;
-       } else {
+       if (!cpus_equal(cpumask, CPU_MASK_ALL)) {
                cpus_and(tmp, cpu_online_map, cpumask);
 
-               if (cpus_empty(tmp))
-                       server = default_distrib_server;
-               else
-                       server = get_hard_smp_processor_id(first_cpu(tmp));
+               server = first_cpu(tmp);
+
+               if (server < NR_CPUS)
+                       return get_hard_smp_processor_id(server);
+
+               if (strict_check)
+                       return -1;
        }
 
-       return server;
+       if (cpus_equal(cpu_online_map, cpu_present_map))
+               return default_distrib_server;
 
+       return default_server;
 }
 #else
-static int get_irq_server(unsigned int irq)
+static int get_irq_server(unsigned int virq, unsigned int strict_check)
 {
        return default_server;
 }
 #endif
 
-
 static void xics_unmask_irq(unsigned int virq)
 {
        unsigned int irq;
        int call_status;
-       unsigned int server;
+       int server;
 
-       irq = virt_irq_to_real(irq_offset_down(virq));
-       WARN_ON(irq == NO_IRQ);
-       if (irq == XICS_IPI || irq == NO_IRQ)
+       pr_debug("xics: unmask virq %d\n", virq);
+
+       irq = (unsigned int)irq_map[virq].hwirq;
+       pr_debug(" -> map to hwirq 0x%x\n", irq);
+       if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
                return;
 
-       server = get_irq_server(virq);
+       server = get_irq_server(virq, 0);
 
        call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server,
                                DEFAULT_PRIORITY);
@@ -238,10 +216,16 @@ static void xics_unmask_irq(unsigned int virq)
        }
 }
 
+static unsigned int xics_startup(unsigned int virq)
+{
+       /* unmask it */
+       xics_unmask_irq(virq);
+       return 0;
+}
+
 static void xics_mask_real_irq(unsigned int irq)
 {
        int call_status;
-       unsigned int server;
 
        if (irq == XICS_IPI)
                return;
@@ -253,9 +237,9 @@ static void xics_mask_real_irq(unsigned int irq)
                return;
        }
 
-       server = get_irq_server(irq);
        /* Have to set XIVE to 0xff to be able to remove a slot */
-       call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 0xff);
+       call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq,
+                               default_server, 0xff);
        if (call_status != 0) {
                printk(KERN_ERR "xics_disable_irq: irq=%u: ibm_set_xive(0xff)"
                       " returned %d\n", irq, call_status);
@@ -267,99 +251,249 @@ static void xics_mask_irq(unsigned int virq)
 {
        unsigned int irq;
 
-       irq = virt_irq_to_real(irq_offset_down(virq));
-       WARN_ON(irq == NO_IRQ);
-       if (irq != NO_IRQ)
-               xics_mask_real_irq(irq);
+       pr_debug("xics: mask virq %d\n", virq);
+
+       irq = (unsigned int)irq_map[virq].hwirq;
+       if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+               return;
+       xics_mask_real_irq(irq);
 }
 
-static void xics_set_irq_revmap(unsigned int virq)
+static void xics_mask_unknown_vec(unsigned int vec)
 {
-       unsigned int irq;
+       printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec);
+       xics_mask_real_irq(vec);
+}
 
-       irq = irq_offset_down(virq);
-       if (radix_tree_insert(&irq_map, virt_irq_to_real(irq),
-                             &virt_irq_to_real_map[irq]) == -ENOMEM)
-               printk(KERN_CRIT "Out of memory creating real -> virtual"
-                      " IRQ mapping for irq %u (real 0x%x)\n",
-                      virq, virt_irq_to_real(irq));
+static inline unsigned int xics_xirr_vector(unsigned int xirr)
+{
+       /*
+        * The top byte is the old cppr, to be restored on EOI.
+        * The remaining 24 bits are the vector.
+        */
+       return xirr & 0x00ffffff;
 }
 
-static unsigned int xics_startup(unsigned int virq)
+static unsigned int xics_get_irq_direct(void)
 {
-       xics_set_irq_revmap(virq);
-       xics_unmask_irq(virq);
-       return 0;
+       unsigned int xirr = direct_xirr_info_get();
+       unsigned int vec = xics_xirr_vector(xirr);
+       unsigned int irq;
+
+       if (vec == XICS_IRQ_SPURIOUS)
+               return NO_IRQ;
+
+       irq = irq_radix_revmap_lookup(xics_host, vec);
+       if (likely(irq != NO_IRQ))
+               return irq;
+
+       /* We don't have a linux mapping, so have rtas mask it. */
+       xics_mask_unknown_vec(vec);
+
+       /* We might learn about it later, so EOI it */
+       direct_xirr_info_set(xirr);
+       return NO_IRQ;
 }
 
-static unsigned int real_irq_to_virt(unsigned int real_irq)
+static unsigned int xics_get_irq_lpar(void)
 {
-       unsigned int *ptr;
+       unsigned int xirr = lpar_xirr_info_get();
+       unsigned int vec = xics_xirr_vector(xirr);
+       unsigned int irq;
 
-       ptr = radix_tree_lookup(&irq_map, real_irq);
-       if (ptr == NULL)
+       if (vec == XICS_IRQ_SPURIOUS)
                return NO_IRQ;
-       return ptr - virt_irq_to_real_map;
+
+       irq = irq_radix_revmap_lookup(xics_host, vec);
+       if (likely(irq != NO_IRQ))
+               return irq;
+
+       /* We don't have a linux mapping, so have RTAS mask it. */
+       xics_mask_unknown_vec(vec);
+
+       /* We might learn about it later, so EOI it */
+       lpar_xirr_info_set(xirr);
+       return NO_IRQ;
 }
 
-static void xics_eoi_direct(unsigned int irq)
+static void xics_eoi_direct(unsigned int virq)
 {
-       int cpu = smp_processor_id();
+       unsigned int irq = (unsigned int)irq_map[virq].hwirq;
 
        iosync();
-       direct_xirr_info_set(cpu, ((0xff << 24) |
-                                  (virt_irq_to_real(irq_offset_down(irq)))));
+       direct_xirr_info_set((0xff << 24) | irq);
 }
 
-
-static void xics_eoi_lpar(unsigned int irq)
+static void xics_eoi_lpar(unsigned int virq)
 {
-       int cpu = smp_processor_id();
+       unsigned int irq = (unsigned int)irq_map[virq].hwirq;
 
        iosync();
-       lpar_xirr_info_set(cpu, ((0xff << 24) |
-                                (virt_irq_to_real(irq_offset_down(irq)))));
-
+       lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static inline int xics_remap_irq(int vec)
+static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
 {
-       int irq;
+       unsigned int irq;
+       int status;
+       int xics_status[2];
+       int irq_server;
 
-       vec &= 0x00ffffff;
+       irq = (unsigned int)irq_map[virq].hwirq;
+       if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+               return;
 
-       if (vec == XICS_IRQ_SPURIOUS)
-               return NO_IRQ;
+       status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
 
-       irq = real_irq_to_virt(vec);
-       if (irq == NO_IRQ)
-               irq = real_irq_to_virt_slowpath(vec);
-       if (likely(irq != NO_IRQ))
-               return irq_offset_up(irq);
+       if (status) {
+               printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive "
+                      "returns %d\n", irq, status);
+               return;
+       }
 
-       printk(KERN_ERR "Interrupt %u (real) is invalid,"
-              " disabling it.\n", vec);
-       xics_mask_real_irq(vec);
-       return NO_IRQ;
+       /*
+        * For the moment only implement delivery to all cpus or one cpu.
+        * Get current irq_server for the given irq
+        */
+       irq_server = get_irq_server(virq, 1);
+       if (irq_server == -1) {
+               char cpulist[128];
+               cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+               printk(KERN_WARNING "xics_set_affinity: No online cpus in "
+                               "the mask %s for irq %d\n", cpulist, virq);
+               return;
+       }
+
+       status = rtas_call(ibm_set_xive, 3, 1, NULL,
+                               irq, irq_server, xics_status[1]);
+
+       if (status) {
+               printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive "
+                      "returns %d\n", irq, status);
+               return;
+       }
 }
 
-static int xics_get_irq_direct(struct pt_regs *regs)
+static struct irq_chip xics_pic_direct = {
+       .typename = " XICS     ",
+       .startup = xics_startup,
+       .mask = xics_mask_irq,
+       .unmask = xics_unmask_irq,
+       .eoi = xics_eoi_direct,
+       .set_affinity = xics_set_affinity
+};
+
+static struct irq_chip xics_pic_lpar = {
+       .typename = " XICS     ",
+       .startup = xics_startup,
+       .mask = xics_mask_irq,
+       .unmask = xics_unmask_irq,
+       .eoi = xics_eoi_lpar,
+       .set_affinity = xics_set_affinity
+};
+
+
+/* Interface to arch irq controller subsystem layer */
+
+/* Points to the irq_chip we're actually using */
+static struct irq_chip *xics_irq_chip;
+
+static int xics_host_match(struct irq_host *h, struct device_node *node)
 {
-       unsigned int cpu = smp_processor_id();
+       /* IBM machines have interrupt parents of various funky types for things
+        * like vdevices, events, etc... The trick we use here is to match
+        * everything here except the legacy 8259 which is compatible "chrp,iic"
+        */
+       return !of_device_is_compatible(node, "chrp,iic");
+}
 
-       return xics_remap_irq(direct_xirr_info_get(cpu));
+static int xics_host_map(struct irq_host *h, unsigned int virq,
+                        irq_hw_number_t hw)
+{
+       pr_debug("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
+
+       /* Insert the interrupt mapping into the radix tree for fast lookup */
+       irq_radix_revmap_insert(xics_host, virq, hw);
+
+       get_irq_desc(virq)->status |= IRQ_LEVEL;
+       set_irq_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
+       return 0;
+}
+
+static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
+                          u32 *intspec, unsigned int intsize,
+                          irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+       /* Current xics implementation translates everything
+        * to level. It is not technically right for MSIs but this
+        * is irrelevant at this point. We might get smarter in the future
+        */
+       *out_hwirq = intspec[0];
+       *out_flags = IRQ_TYPE_LEVEL_LOW;
+
+       return 0;
 }
 
-static int xics_get_irq_lpar(struct pt_regs *regs)
+static struct irq_host_ops xics_host_ops = {
+       .match = xics_host_match,
+       .map = xics_host_map,
+       .xlate = xics_host_xlate,
+};
+
+static void __init xics_init_host(void)
 {
-       unsigned int cpu = smp_processor_id();
+       if (firmware_has_feature(FW_FEATURE_LPAR))
+               xics_irq_chip = &xics_pic_lpar;
+       else
+               xics_irq_chip = &xics_pic_direct;
 
-       return xics_remap_irq(lpar_xirr_info_get(cpu));
+       xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
+                                  XICS_IRQ_SPURIOUS);
+       BUG_ON(xics_host == NULL);
+       irq_set_default_host(xics_host);
 }
 
+
+/* Inter-processor interrupt support */
+
 #ifdef CONFIG_SMP
+/*
+ * XICS only has a single IPI, so encode the messages per CPU
+ */
+struct xics_ipi_struct {
+        unsigned long value;
+       } ____cacheline_aligned;
+
+static struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned;
 
-static irqreturn_t xics_ipi_dispatch(int cpu, struct pt_regs *regs)
+static inline void smp_xics_do_message(int cpu, int msg)
+{
+       set_bit(msg, &xics_ipi_message[cpu].value);
+       mb();
+       if (firmware_has_feature(FW_FEATURE_LPAR))
+               lpar_qirr_info(cpu, IPI_PRIORITY);
+       else
+               direct_qirr_info(cpu, IPI_PRIORITY);
+}
+
+void smp_xics_message_pass(int target, int msg)
+{
+       unsigned int i;
+
+       if (target < NR_CPUS) {
+               smp_xics_do_message(target, msg);
+       } else {
+               for_each_online_cpu(i) {
+                       if (target == MSG_ALL_BUT_SELF
+                           && i == smp_processor_id())
+                               continue;
+                       smp_xics_do_message(i, msg);
+               }
+       }
+}
+
+static irqreturn_t xics_ipi_dispatch(int cpu)
 {
        WARN_ON(cpu_is_offline(cpu));
 
@@ -367,335 +501,288 @@ static irqreturn_t xics_ipi_dispatch(int cpu, struct pt_regs *regs)
                if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION,
                                       &xics_ipi_message[cpu].value)) {
                        mb();
-                       smp_message_recv(PPC_MSG_CALL_FUNCTION, regs);
+                       smp_message_recv(PPC_MSG_CALL_FUNCTION);
                }
                if (test_and_clear_bit(PPC_MSG_RESCHEDULE,
                                       &xics_ipi_message[cpu].value)) {
                        mb();
-                       smp_message_recv(PPC_MSG_RESCHEDULE, regs);
+                       smp_message_recv(PPC_MSG_RESCHEDULE);
                }
-#if 0
-               if (test_and_clear_bit(PPC_MSG_MIGRATE_TASK,
+               if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE,
                                       &xics_ipi_message[cpu].value)) {
                        mb();
-                       smp_message_recv(PPC_MSG_MIGRATE_TASK, regs);
+                       smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
                }
-#endif
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
                if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK,
                                       &xics_ipi_message[cpu].value)) {
                        mb();
-                       smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs);
+                       smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
                }
 #endif
        }
        return IRQ_HANDLED;
 }
 
-static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id)
 {
        int cpu = smp_processor_id();
 
        direct_qirr_info(cpu, 0xff);
 
-       return xics_ipi_dispatch(cpu, regs);
+       return xics_ipi_dispatch(cpu);
 }
 
-static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id)
 {
        int cpu = smp_processor_id();
 
        lpar_qirr_info(cpu, 0xff);
 
-       return xics_ipi_dispatch(cpu, regs);
+       return xics_ipi_dispatch(cpu);
 }
 
-void xics_cause_IPI(int cpu)
+static void xics_request_ipi(void)
 {
-       if (firmware_has_feature(FW_FEATURE_LPAR))
-               lpar_qirr_info(cpu, IPI_PRIORITY);
-       else
-               direct_qirr_info(cpu, IPI_PRIORITY);
-}
+       unsigned int ipi;
+       int rc;
 
-#endif /* CONFIG_SMP */
+       ipi = irq_create_mapping(xics_host, XICS_IPI);
+       BUG_ON(ipi == NO_IRQ);
 
-static void xics_set_cpu_priority(int cpu, unsigned char cppr)
-{
+       /*
+        * IPIs are marked IRQF_DISABLED as they must run with irqs
+        * disabled
+        */
+       set_irq_handler(ipi, handle_percpu_irq);
        if (firmware_has_feature(FW_FEATURE_LPAR))
-               lpar_cppr_info(cpu, cppr);
+               rc = request_irq(ipi, xics_ipi_action_lpar, IRQF_DISABLED,
+                               "IPI", NULL);
        else
-               direct_cppr_info(cpu, cppr);
-       iosync();
+               rc = request_irq(ipi, xics_ipi_action_direct, IRQF_DISABLED,
+                               "IPI", NULL);
+       BUG_ON(rc);
 }
 
-static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+int __init smp_xics_probe(void)
 {
-       unsigned int irq;
-       int status;
-       int xics_status[2];
-       unsigned long newmask;
-       cpumask_t tmp = CPU_MASK_NONE;
+       xics_request_ipi();
 
-       irq = virt_irq_to_real(irq_offset_down(virq));
-       if (irq == XICS_IPI || irq == NO_IRQ)
-               return;
+       return cpus_weight(cpu_possible_map);
+}
 
-       status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
+#endif /* CONFIG_SMP */
 
-       if (status) {
-               printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive "
-                      "returns %d\n", irq, status);
-               return;
-       }
 
-       /* For the moment only implement delivery to all cpus or one cpu */
-       if (cpus_equal(cpumask, CPU_MASK_ALL)) {
-               newmask = default_distrib_server;
-       } else {
-               cpus_and(tmp, cpu_online_map, cpumask);
-               if (cpus_empty(tmp))
-                       return;
-               newmask = get_hard_smp_processor_id(first_cpu(tmp));
-       }
+/* Initialization */
 
-       status = rtas_call(ibm_set_xive, 3, 1, NULL,
-                               irq, newmask, xics_status[1]);
+static void xics_update_irq_servers(void)
+{
+       int i, j;
+       struct device_node *np;
+       u32 ilen;
+       const u32 *ireg, *isize;
+       u32 hcpuid;
 
-       if (status) {
-               printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive "
-                      "returns %d\n", irq, status);
+       /* Find the server numbers for the boot cpu. */
+       np = of_get_cpu_node(boot_cpuid, NULL);
+       BUG_ON(!np);
+
+       ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
+       if (!ireg) {
+               of_node_put(np);
                return;
        }
-}
 
-static struct irq_chip xics_pic_direct = {
-       .typename = " XICS     ",
-       .startup = xics_startup,
-       .mask = xics_mask_irq,
-       .unmask = xics_unmask_irq,
-       .eoi = xics_eoi_direct,
-       .set_affinity = xics_set_affinity
-};
+       i = ilen / sizeof(int);
+       hcpuid = get_hard_smp_processor_id(boot_cpuid);
 
+       /* Global interrupt distribution server is specified in the last
+        * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
+        * entry fom this property for current boot cpu id and use it as
+        * default distribution server
+        */
+       for (j = 0; j < i; j += 2) {
+               if (ireg[j] == hcpuid) {
+                       default_server = hcpuid;
+                       default_distrib_server = ireg[j+1];
+               }
+       }
 
-static struct irq_chip xics_pic_lpar = {
-       .typename = " XICS     ",
-       .startup = xics_startup,
-       .mask = xics_mask_irq,
-       .unmask = xics_unmask_irq,
-       .eoi = xics_eoi_lpar,
-       .set_affinity = xics_set_affinity
-};
+       /* get the bit size of server numbers */
+       isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+       if (isize)
+               interrupt_server_size = *isize;
 
+       of_node_put(np);
+}
 
-void xics_setup_cpu(void)
+static void __init xics_map_one_cpu(int hw_id, unsigned long addr,
+                                    unsigned long size)
 {
-       int cpu = smp_processor_id();
-
-       xics_set_cpu_priority(cpu, 0xff);
+       int i;
 
-       /*
-        * Put the calling processor into the GIQ.  This is really only
-        * necessary from a secondary thread as the OF start-cpu interface
-        * performs this function for us on primary threads.
-        *
-        * XXX: undo of teardown on kexec needs this too, as may hotplug
+       /* This may look gross but it's good enough for now, we don't quite
+        * have a hard -> linux processor id matching.
         */
-       rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
-               (1UL << interrupt_server_size) - 1 - default_distrib_server, 1);
+       for_each_possible_cpu(i) {
+               if (!cpu_present(i))
+                       continue;
+               if (hw_id == get_hard_smp_processor_id(i)) {
+                       xics_per_cpu[i] = ioremap(addr, size);
+                       return;
+               }
+       }
 }
 
-void xics_init_IRQ(void)
+static void __init xics_init_one_node(struct device_node *np,
+                                     unsigned int *indx)
 {
-       int i;
-       unsigned long intr_size = 0;
-       struct device_node *np;
-       uint *ireg, ilen, indx = 0;
-       unsigned long intr_base = 0;
-       struct xics_interrupt_node {
-               unsigned long addr;
-               unsigned long size;
-       } intnodes[NR_CPUS];
-       struct irq_chip *chip;
+       unsigned int ilen;
+       const u32 *ireg;
 
-       ppc64_boot_msg(0x20, "XICS Init");
-
-       ibm_get_xive = rtas_token("ibm,get-xive");
-       ibm_set_xive = rtas_token("ibm,set-xive");
-       ibm_int_on  = rtas_token("ibm,int-on");
-       ibm_int_off = rtas_token("ibm,int-off");
-
-       np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation");
-       if (!np)
-               panic("xics_init_IRQ: can't find interrupt presentation");
+       /* This code does the theorically broken assumption that the interrupt
+        * server numbers are the same as the hard CPU numbers.
+        * This happens to be the case so far but we are playing with fire...
+        * should be fixed one of these days. -BenH.
+        */
+       ireg = of_get_property(np, "ibm,interrupt-server-ranges", NULL);
 
-nextnode:
-       ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL);
+       /* Do that ever happen ? we'll know soon enough... but even good'old
+        * f80 does have that property ..
+        */
+       WARN_ON(ireg == NULL);
        if (ireg) {
                /*
                 * set node starting index for this node
                 */
-               indx = *ireg;
+               *indx = *ireg;
        }
-
-       ireg = (uint *)get_property(np, "reg", &ilen);
+       ireg = of_get_property(np, "reg", &ilen);
        if (!ireg)
                panic("xics_init_IRQ: can't find interrupt reg property");
 
-       while (ilen) {
-               intnodes[indx].addr = (unsigned long)*ireg++ << 32;
-               ilen -= sizeof(uint);
-               intnodes[indx].addr |= *ireg++;
-               ilen -= sizeof(uint);
-               intnodes[indx].size = (unsigned long)*ireg++ << 32;
-               ilen -= sizeof(uint);
-               intnodes[indx].size |= *ireg++;
-               ilen -= sizeof(uint);
-               indx++;
-               if (indx >= NR_CPUS) break;
+       while (ilen >= (4 * sizeof(u32))) {
+               unsigned long addr, size;
+
+               /* XXX Use proper OF parsing code here !!! */
+               addr = (unsigned long)*ireg++ << 32;
+               ilen -= sizeof(u32);
+               addr |= *ireg++;
+               ilen -= sizeof(u32);
+               size = (unsigned long)*ireg++ << 32;
+               ilen -= sizeof(u32);
+               size |= *ireg++;
+               ilen -= sizeof(u32);
+               xics_map_one_cpu(*indx, addr, size);
+               (*indx)++;
        }
+}
 
-       np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation");
-       if ((indx < NR_CPUS) && np) goto nextnode;
+void __init xics_init_IRQ(void)
+{
+       struct device_node *np;
+       u32 indx = 0;
+       int found = 0;
 
-       /* Find the server numbers for the boot cpu. */
-       for (np = of_find_node_by_type(NULL, "cpu");
-            np;
-            np = of_find_node_by_type(np, "cpu")) {
-               ireg = (uint *)get_property(np, "reg", &ilen);
-               if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) {
-                       ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s",
-                                                   &ilen);
-                       i = ilen / sizeof(int);
-                       if (ireg && i > 0) {
-                               default_server = ireg[0];
-                               default_distrib_server = ireg[i-1]; /* take last element */
-                       }
-                       ireg = (uint *)get_property(np,
-                                       "ibm,interrupt-server#-size", NULL);
-                       if (ireg)
-                               interrupt_server_size = *ireg;
-                       break;
-               }
-       }
-       of_node_put(np);
+       ppc64_boot_msg(0x20, "XICS Init");
 
-       intr_base = intnodes[0].addr;
-       intr_size = intnodes[0].size;
+       ibm_get_xive = rtas_token("ibm,get-xive");
+       ibm_set_xive = rtas_token("ibm,set-xive");
+       ibm_int_on  = rtas_token("ibm,int-on");
+       ibm_int_off = rtas_token("ibm,int-off");
 
-       if (firmware_has_feature(FW_FEATURE_LPAR)) {
-               ppc_md.get_irq = xics_get_irq_lpar;
-               chip = &xics_pic_lpar;
-       } else {
-#ifdef CONFIG_SMP
-               for_each_possible_cpu(i) {
-                       int hard_id;
+       for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") {
+               found = 1;
+               if (firmware_has_feature(FW_FEATURE_LPAR)) {
+                       of_node_put(np);
+                       break;
+                       }
+               xics_init_one_node(np, &indx);
+       }
+       if (found == 0)
+               return;
 
-                       /* FIXME: Do this dynamically! --RR */
-                       if (!cpu_present(i))
-                               continue;
+       xics_update_irq_servers();
+       xics_init_host();
 
-                       hard_id = get_hard_smp_processor_id(i);
-                       xics_per_cpu[i] = ioremap(intnodes[hard_id].addr,
-                                                 intnodes[hard_id].size);
-               }
-#else
-               xics_per_cpu[0] = ioremap(intr_base, intr_size);
-#endif /* CONFIG_SMP */
+       if (firmware_has_feature(FW_FEATURE_LPAR))
+               ppc_md.get_irq = xics_get_irq_lpar;
+       else
                ppc_md.get_irq = xics_get_irq_direct;
-               chip = &xics_pic_direct;
-
-       }
-
-       for (i = irq_offset_value(); i < NR_IRQS; ++i) {
-               /* All IRQs on XICS are level for now. MSI code may want to modify
-                * that for reporting purposes
-                */
-               get_irq_desc(i)->status |= IRQ_LEVEL;
-               set_irq_chip_and_handler(i, chip, handle_fasteoi_irq);
-       }
 
        xics_setup_cpu();
 
        ppc64_boot_msg(0x21, "XICS Done");
 }
 
-static int xics_setup_8259_cascade(void)
-{
-       struct device_node *np;
-       uint *ireg;
-
-       np = of_find_node_by_type(NULL, "interrupt-controller");
-       if (np == NULL) {
-               printk(KERN_WARNING "xics: no ISA interrupt controller\n");
-               xics_irq_8259_cascade_real = -1;
-               xics_irq_8259_cascade = -1;
-               return 0;
-       }
+/* Cpu startup, shutdown, and hotplug */
 
-       ireg = (uint *) get_property(np, "interrupts", NULL);
-       if (!ireg)
-               panic("xics_init_IRQ: can't find ISA interrupts property");
+static void xics_set_cpu_priority(unsigned char cppr)
+{
+       if (firmware_has_feature(FW_FEATURE_LPAR))
+               lpar_cppr_info(cppr);
+       else
+               direct_cppr_info(cppr);
+       iosync();
+}
 
-       xics_irq_8259_cascade_real = *ireg;
-       xics_irq_8259_cascade = irq_offset_up
-               (virt_irq_create_mapping(xics_irq_8259_cascade_real));
-       i8259_init(0, 0);
-       of_node_put(np);
+/* Have the calling processor join or leave the specified global queue */
+static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
+{
+       int status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE,
+               (1UL << interrupt_server_size) - 1 - gserver, join);
+       WARN_ON(status < 0);
+}
 
-       xics_set_irq_revmap(xics_irq_8259_cascade);
-       set_irq_chained_handler(xics_irq_8259_cascade, pSeries_8259_cascade);
+void xics_setup_cpu(void)
+{
+       xics_set_cpu_priority(0xff);
 
-       return 0;
+       xics_set_cpu_giq(default_distrib_server, 1);
 }
-arch_initcall(xics_setup_8259_cascade);
 
-
-#ifdef CONFIG_SMP
-void xics_request_IPIs(void)
+void xics_teardown_cpu(void)
 {
-       virt_irq_to_real_map[XICS_IPI] = XICS_IPI;
+       int cpu = smp_processor_id();
 
-       /*
-        * IPIs are marked IRQF_DISABLED as they must run with irqs
-        * disabled
-        */
-       set_irq_handler(irq_offset_up(XICS_IPI), handle_percpu_irq);
+       xics_set_cpu_priority(0);
+
+       /* Clear any pending IPI request */
        if (firmware_has_feature(FW_FEATURE_LPAR))
-               request_irq(irq_offset_up(XICS_IPI), xics_ipi_action_lpar,
-                           SA_INTERRUPT, "IPI", NULL);
+               lpar_qirr_info(cpu, 0xff);
        else
-               request_irq(irq_offset_up(XICS_IPI), xics_ipi_action_direct,
-                           SA_INTERRUPT, "IPI", NULL);
+               direct_qirr_info(cpu, 0xff);
 }
-#endif /* CONFIG_SMP */
 
-void xics_teardown_cpu(int secondary)
+void xics_kexec_teardown_cpu(int secondary)
 {
-       struct irq_desc *desc = get_irq_desc(irq_offset_up(XICS_IPI));
-       int cpu = smp_processor_id();
+       unsigned int ipi;
+       struct irq_desc *desc;
 
-       xics_set_cpu_priority(cpu, 0);
+       xics_teardown_cpu();
 
        /*
-        * we need to EOI the IPI if we got here from kexec down IPI
+        * we need to EOI the IPI
         *
         * probably need to check all the other interrupts too
         * should we be flagging idle loop instead?
         * or creating some task to be scheduled?
         */
+
+       ipi = irq_find_mapping(xics_host, XICS_IPI);
+       if (ipi == XICS_IRQ_SPURIOUS)
+               return;
+       desc = get_irq_desc(ipi);
        if (desc->chip && desc->chip->eoi)
-               desc->chip->eoi(XICS_IPI);
+               desc->chip->eoi(ipi);
 
        /*
         * Some machines need to have at least one cpu in the GIQ,
         * so leave the master cpu in the group.
         */
        if (secondary)
-               rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
-                       (1UL << interrupt_server_size) - 1 -
-                       default_distrib_server, 0);
+               xics_set_cpu_giq(default_distrib_server, 0);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -703,35 +790,38 @@ void xics_teardown_cpu(int secondary)
 /* Interrupts are disabled. */
 void xics_migrate_irqs_away(void)
 {
-       int status;
-       unsigned int irq, virq, cpu = smp_processor_id();
+       int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
+       unsigned int irq, virq;
+
+       /* If we used to be the default server, move to the new "boot_cpuid" */
+       if (hw_cpu == default_server)
+               xics_update_irq_servers();
 
        /* Reject any interrupt that was queued to us... */
-       xics_set_cpu_priority(cpu, 0);
+       xics_set_cpu_priority(0);
 
-       /* remove ourselves from the global interrupt queue */
-       status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
-               (1UL << interrupt_server_size) - 1 - default_distrib_server, 0);
-       WARN_ON(status < 0);
+       /* Remove ourselves from the global interrupt queue */
+       xics_set_cpu_giq(default_distrib_server, 0);
 
        /* Allow IPIs again... */
-       xics_set_cpu_priority(cpu, DEFAULT_PRIORITY);
+       xics_set_cpu_priority(DEFAULT_PRIORITY);
 
        for_each_irq(virq) {
                struct irq_desc *desc;
                int xics_status[2];
+               int status;
                unsigned long flags;
 
                /* We cant set affinity on ISA interrupts */
-               if (virq < irq_offset_value())
+               if (virq < NUM_ISA_INTERRUPTS)
                        continue;
-
-               desc = get_irq_desc(virq);
-               irq = virt_irq_to_real(irq_offset_down(virq));
-
+               if (irq_map[virq].host != xics_host)
+                       continue;
+               irq = (unsigned int)irq_map[virq].hwirq;
                /* We need to get IPIs still. */
-               if (irq == XICS_IPI || irq == NO_IRQ)
+               if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
                        continue;
+               desc = get_irq_desc(virq);
 
                /* We only need to migrate enabled IRQS */
                if (desc == NULL || desc->chip == NULL
@@ -754,15 +844,15 @@ void xics_migrate_irqs_away(void)
                 * The irq has to be migrated only in the single cpu
                 * case.
                 */
-               if (xics_status[0] != get_hard_smp_processor_id(cpu))
+               if (xics_status[0] != hw_cpu)
                        goto unlock;
 
                printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n",
                       virq, cpu);
 
                /* Reset affinity to all cpus */
+               irq_desc[virq].affinity = CPU_MASK_ALL;
                desc->chip->set_affinity(virq, CPU_MASK_ALL);
-               irq_desc[irq].affinity = CPU_MASK_ALL;
 unlock:
                spin_unlock_irqrestore(&desc->lock, flags);
        }