nfsd41: hard page limit for DRC
[safe/jmp/linux-2.6] / arch / ia64 / kernel / iosapic.c
index cf27cfb..e131250 100644 (file)
@@ -69,7 +69,7 @@
  *     systems, we use one-to-one mapping between IA-64 vector and IRQ.  A
  *     platform can implement platform_irq_to_vector(irq) and
  *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
- *     Please see also include/asm-ia64/hw_irq.h for those APIs.
+ *     Please see also arch/ia64/include/asm/hw_irq.h for those APIs.
  *
  * To sum up, there are three levels of mappings involved:
  *
@@ -142,7 +142,7 @@ struct iosapic_rte_info {
 static struct iosapic_intr_info {
        struct list_head rtes;          /* RTEs using this vector (empty =>
                                         * not an IOSAPIC interrupt) */
-       int             count;          /* # of RTEs that shares this vector */
+       int             count;          /* # of registered RTEs */
        u32             low32;          /* current value of low word of
                                         * Redirection table entry */
        unsigned int    dest;           /* destination CPU physical ID */
@@ -199,19 +199,6 @@ static inline int __gsi_to_irq(unsigned int gsi)
        return -1;
 }
 
-/*
- * Translate GSI number to the corresponding IA-64 interrupt vector.  If no
- * entry exists, return -1.
- */
-inline int
-gsi_to_vector (unsigned int gsi)
-{
-       int irq = __gsi_to_irq(gsi);
-       if (check_irq_used(irq) < 0)
-               return -1;
-       return irq_to_vector(irq);
-}
-
 int
 gsi_to_irq (unsigned int gsi)
 {
@@ -313,7 +300,7 @@ mask_irq (unsigned int irq)
        int rte_index;
        struct iosapic_rte_info *rte;
 
-       if (list_empty(&iosapic_intr_info[irq].rtes))
+       if (!iosapic_intr_info[irq].count)
                return;                 /* not an IOSAPIC interrupt! */
 
        /* set only the mask bit */
@@ -331,7 +318,7 @@ unmask_irq (unsigned int irq)
        int rte_index;
        struct iosapic_rte_info *rte;
 
-       if (list_empty(&iosapic_intr_info[irq].rtes))
+       if (!iosapic_intr_info[irq].count)
                return;                 /* not an IOSAPIC interrupt! */
 
        low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK;
@@ -343,23 +330,27 @@ unmask_irq (unsigned int irq)
 
 
 static void
-iosapic_set_affinity (unsigned int irq, cpumask_t mask)
+iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
        u32 high32, low32;
-       int dest, rte_index;
+       int cpu, dest, rte_index;
        int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
        struct iosapic_rte_info *rte;
        struct iosapic *iosapic;
 
        irq &= (~IA64_IRQ_REDIRECTED);
 
-       if (cpus_empty(mask))
+       cpu = cpumask_first_and(cpu_online_mask, mask);
+       if (cpu >= nr_cpu_ids)
+               return;
+
+       if (irq_prepare_move(irq, cpu))
                return;
 
-       dest = cpu_physical_id(first_cpu(mask));
+       dest = cpu_physical_id(cpu);
 
-       if (list_empty(&iosapic_intr_info[irq].rtes))
+       if (!iosapic_intr_info[irq].count)
                return;                 /* not an IOSAPIC interrupt */
 
        set_irq_affinity_info(irq, dest, redir);
@@ -374,6 +365,8 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
        else
                /* change delivery mode to fixed */
                low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
+       low32 &= IOSAPIC_VECTOR_MASK;
+       low32 |= irq_to_vector(irq);
 
        iosapic_intr_info[irq].low32 = low32;
        iosapic_intr_info[irq].dest = dest;
@@ -402,10 +395,21 @@ iosapic_end_level_irq (unsigned int irq)
 {
        ia64_vector vec = irq_to_vector(irq);
        struct iosapic_rte_info *rte;
+       int do_unmask_irq = 0;
+
+       irq_complete_move(irq);
+       if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+               do_unmask_irq = 1;
+               mask_irq(irq);
+       }
 
-       move_native_irq(irq);
        list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
                iosapic_eoi(rte->iosapic->addr, vec);
+
+       if (unlikely(do_unmask_irq)) {
+               move_masked_irq(irq);
+               unmask_irq(irq);
+       }
 }
 
 #define iosapic_shutdown_level_irq     mask_irq
@@ -413,7 +417,7 @@ iosapic_end_level_irq (unsigned int irq)
 #define iosapic_disable_level_irq      mask_irq
 #define iosapic_ack_level_irq          nop
 
-struct irq_chip irq_type_iosapic_level = {
+static struct irq_chip irq_type_iosapic_level = {
        .name =         "IO-SAPIC-level",
        .startup =      iosapic_startup_level_irq,
        .shutdown =     iosapic_shutdown_level_irq,
@@ -447,6 +451,7 @@ iosapic_ack_edge_irq (unsigned int irq)
 {
        irq_desc_t *idesc = irq_desc + irq;
 
+       irq_complete_move(irq);
        move_native_irq(irq);
        /*
         * Once we have recorded IRQ_PENDING already, we can mask the
@@ -462,7 +467,7 @@ iosapic_ack_edge_irq (unsigned int irq)
 #define iosapic_disable_edge_irq       nop
 #define iosapic_end_edge_irq           nop
 
-struct irq_chip irq_type_iosapic_edge = {
+static struct irq_chip irq_type_iosapic_edge = {
        .name =         "IO-SAPIC-edge",
        .startup =      iosapic_startup_edge_irq,
        .shutdown =     iosapic_disable_edge_irq,
@@ -475,7 +480,7 @@ struct irq_chip irq_type_iosapic_edge = {
        .set_affinity = iosapic_set_affinity
 };
 
-unsigned int
+static unsigned int
 iosapic_version (char __iomem *addr)
 {
        /*
@@ -502,7 +507,7 @@ static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol)
        if (trigger == IOSAPIC_EDGE)
                return -EINVAL;
 
-       for (i = 0; i <= NR_IRQS; i++) {
+       for (i = 0; i < NR_IRQS; i++) {
                info = &iosapic_intr_info[i];
                if (info->trigger == trigger && info->polarity == pol &&
                    (info->dmode == IOSAPIC_FIXED ||
@@ -526,10 +531,10 @@ iosapic_reassign_vector (int irq)
 {
        int new_irq;
 
-       if (!list_empty(&iosapic_intr_info[irq].rtes)) {
+       if (iosapic_intr_info[irq].count) {
                new_irq = create_irq();
                if (new_irq < 0)
-                       panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+                       panic("%s: out of interrupt vectors!\n", __func__);
                printk(KERN_INFO "Reassigning vector %d to %d\n",
                       irq_to_vector(irq), irq_to_vector(new_irq));
                memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq],
@@ -544,7 +549,7 @@ iosapic_reassign_vector (int irq)
        }
 }
 
-static struct iosapic_rte_info *iosapic_alloc_rte (void)
+static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void)
 {
        int i;
        struct iosapic_rte_info *rte;
@@ -553,8 +558,6 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void)
        if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
                rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
                                    NR_PREALLOCATE_RTE_ENTRIES);
-               if (!rte)
-                       return NULL;
                for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
                        list_add(&rte->rte_list, &free_rte_list);
        }
@@ -577,17 +580,18 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void)
        return rte;
 }
 
-static void iosapic_free_rte (struct iosapic_rte_info *rte)
+static inline int irq_is_shared (int irq)
 {
-       if (rte->flags & RTE_PREALLOCATED)
-               list_add_tail(&rte->rte_list, &free_rte_list);
-       else
-               kfree(rte);
+       return (iosapic_intr_info[irq].count > 1);
 }
 
-static inline int irq_is_shared (int irq)
+struct irq_chip*
+ia64_native_iosapic_get_irq_chip(unsigned long trigger)
 {
-       return (iosapic_intr_info[irq].count > 1);
+       if (trigger == IOSAPIC_EDGE)
+               return &irq_type_iosapic_edge;
+       else
+               return &irq_type_iosapic_level;
 }
 
 static int
@@ -602,7 +606,7 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery,
        index = find_iosapic(gsi);
        if (index < 0) {
                printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
-                      __FUNCTION__, gsi);
+                      __func__, gsi);
                return -ENODEV;
        }
 
@@ -611,7 +615,7 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery,
                rte = iosapic_alloc_rte();
                if (!rte) {
                        printk(KERN_WARNING "%s: cannot allocate memory\n",
-                              __FUNCTION__);
+                              __func__);
                        return -ENOMEM;
                }
 
@@ -628,7 +632,7 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery,
                    (info->trigger != trigger || info->polarity != polarity)){
                        printk (KERN_WARNING
                                "%s: cannot override the interrupt\n",
-                               __FUNCTION__);
+                               __func__);
                        return -EINVAL;
                }
                rte->refcnt++;
@@ -640,17 +644,14 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery,
        iosapic_intr_info[irq].dmode    = delivery;
        iosapic_intr_info[irq].trigger  = trigger;
 
-       if (trigger == IOSAPIC_EDGE)
-               irq_type = &irq_type_iosapic_edge;
-       else
-               irq_type = &irq_type_iosapic_level;
+       irq_type = iosapic_get_irq_chip(trigger);
 
        idesc = irq_desc + irq;
-       if (idesc->chip != irq_type) {
+       if (irq_type != NULL && idesc->chip != irq_type) {
                if (idesc->chip != &no_irq_type)
                        printk(KERN_WARNING
                               "%s: changing vector %d from %s to %s\n",
-                              __FUNCTION__, irq_to_vector(irq),
+                              __func__, irq_to_vector(irq),
                               idesc->chip->name, irq_type->name);
                idesc->chip = irq_type;
        }
@@ -663,12 +664,13 @@ get_target_cpu (unsigned int gsi, int irq)
 #ifdef CONFIG_SMP
        static int cpu = -1;
        extern int cpe_vector;
+       cpumask_t domain = irq_to_domain(irq);
 
        /*
         * In case of vector shared by multiple RTEs, all RTEs that
         * share the vector need to use the same destination CPU.
         */
-       if (!list_empty(&iosapic_intr_info[irq].rtes))
+       if (iosapic_intr_info[irq].count)
                return iosapic_intr_info[irq].dest;
 
        /*
@@ -693,32 +695,31 @@ get_target_cpu (unsigned int gsi, int irq)
 #ifdef CONFIG_NUMA
        {
                int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
-               cpumask_t cpu_mask;
+               const struct cpumask *cpu_mask;
 
                iosapic_index = find_iosapic(gsi);
                if (iosapic_index < 0 ||
                    iosapic_lists[iosapic_index].node == MAX_NUMNODES)
                        goto skip_numa_setup;
 
-               cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
-
-               for_each_cpu_mask(numa_cpu, cpu_mask) {
-                       if (!cpu_online(numa_cpu))
-                               cpu_clear(numa_cpu, cpu_mask);
+               cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
+               num_cpus = 0;
+               for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
+                       if (cpu_online(numa_cpu))
+                               num_cpus++;
                }
 
-               num_cpus = cpus_weight(cpu_mask);
-
                if (!num_cpus)
                        goto skip_numa_setup;
 
                /* Use irq assignment to distribute across cpus in node */
                cpu_index = irq % num_cpus;
 
-               for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
-                       numa_cpu = next_cpu(numa_cpu, cpu_mask);
+               for_each_cpu_and(numa_cpu, cpu_mask, &domain)
+                       if (cpu_online(numa_cpu) && i++ >= cpu_index)
+                               break;
 
-               if (numa_cpu != NR_CPUS)
+               if (numa_cpu < nr_cpu_ids)
                        return cpu_physical_id(numa_cpu);
        }
 skip_numa_setup:
@@ -729,9 +730,9 @@ skip_numa_setup:
         * case of NUMA.)
         */
        do {
-               if (++cpu >= NR_CPUS)
+               if (++cpu >= nr_cpu_ids)
                        cpu = 0;
-       } while (!cpu_online(cpu));
+       } while (!cpu_online(cpu) || !cpu_isset(cpu, domain));
 
        return cpu_physical_id(cpu);
 #else  /* CONFIG_SMP */
@@ -739,6 +740,15 @@ skip_numa_setup:
 #endif
 }
 
+static inline unsigned char choose_dmode(void)
+{
+#ifdef CONFIG_SMP
+       if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+               return IOSAPIC_LOWEST_PRIORITY;
+#endif
+       return IOSAPIC_FIXED;
+}
+
 /*
  * ACPI can describe IOSAPIC interrupts via static tables and namespace
  * methods.  This provides an interface to register those interrupts and
@@ -753,6 +763,7 @@ iosapic_register_intr (unsigned int gsi,
        unsigned long flags;
        struct iosapic_rte_info *rte;
        u32 low32;
+       unsigned char dmode;
 
        /*
         * If this GSI has already been registered (i.e., it's a
@@ -782,11 +793,12 @@ iosapic_register_intr (unsigned int gsi,
 
        spin_lock(&irq_desc[irq].lock);
        dest = get_target_cpu(gsi, irq);
-       err = register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY,
-                           polarity, trigger);
+       dmode = choose_dmode();
+       err = register_intr(gsi, irq, dmode, polarity, trigger);
        if (err < 0) {
+               spin_unlock(&irq_desc[irq].lock);
                irq = err;
-               goto unlock_all;
+               goto unlock_iosapic_lock;
        }
 
        /*
@@ -802,7 +814,7 @@ iosapic_register_intr (unsigned int gsi,
               gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
               (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
               cpu_logical_id(dest), dest, irq_to_vector(irq));
- unlock_all:
+
        spin_unlock(&irq_desc[irq].lock);
  unlock_iosapic_lock:
        spin_unlock_irqrestore(&iosapic_lock, flags);
@@ -900,7 +912,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
        switch (int_type) {
              case ACPI_INTERRUPT_PMI:
                irq = vector = iosapic_vector;
-               bind_irq_vector(irq, vector);
+               bind_irq_vector(irq, vector, CPU_MASK_ALL);
                /*
                 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
                 * we need to make sure the vector is available
@@ -911,18 +923,18 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
              case ACPI_INTERRUPT_INIT:
                irq = create_irq();
                if (irq < 0)
-                       panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+                       panic("%s: out of interrupt vectors!\n", __func__);
                vector = irq_to_vector(irq);
                delivery = IOSAPIC_INIT;
                break;
              case ACPI_INTERRUPT_CPEI:
                irq = vector = IA64_CPE_VECTOR;
-               BUG_ON(bind_irq_vector(irq, vector));
-               delivery = IOSAPIC_LOWEST_PRIORITY;
+               BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
+               delivery = IOSAPIC_FIXED;
                mask = 1;
                break;
              default:
-               printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
+               printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__,
                       int_type);
                return -1;
        }
@@ -951,10 +963,12 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
 {
        int vector, irq;
        unsigned int dest = cpu_physical_id(smp_processor_id());
+       unsigned char dmode;
 
        irq = vector = isa_irq_to_vector(isa_irq);
-       BUG_ON(bind_irq_vector(irq, vector));
-       register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
+       BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
+       dmode = choose_dmode();
+       register_intr(gsi, irq, dmode, polarity, trigger);
 
        DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
            isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
@@ -965,6 +979,22 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
 }
 
 void __init
+ia64_native_iosapic_pcat_compat_init(void)
+{
+       if (pcat_compat) {
+               /*
+                * Disable the compatibility mode interrupts (8259 style),
+                * needs IN/OUT support enabled.
+                */
+               printk(KERN_INFO
+                      "%s: Disabling PC-AT compatible 8259 interrupts\n",
+                      __func__);
+               outb(0xff, 0xA1);
+               outb(0xff, 0x21);
+       }
+}
+
+void __init
 iosapic_system_init (int system_pcat_compat)
 {
        int irq;
@@ -978,17 +1008,8 @@ iosapic_system_init (int system_pcat_compat)
        }
 
        pcat_compat = system_pcat_compat;
-       if (pcat_compat) {
-               /*
-                * Disable the compatibility mode interrupts (8259 style),
-                * needs IN/OUT support enabled.
-                */
-               printk(KERN_INFO
-                      "%s: Disabling PC-AT compatible 8259 interrupts\n",
-                      __FUNCTION__);
-               outb(0xff, 0xA1);
-               outb(0xff, 0x21);
-       }
+       if (pcat_compat)
+               iosapic_pcat_compat_init();
 }
 
 static inline int
@@ -1000,7 +1021,7 @@ iosapic_alloc (void)
                if (!iosapic_lists[index].addr)
                        return index;
 
-       printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
+       printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__);
        return -1;
 }
 
@@ -1098,14 +1119,14 @@ iosapic_remove (unsigned int gsi_base)
        index = find_iosapic(gsi_base);
        if (index < 0) {
                printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
-                      __FUNCTION__, gsi_base);
+                      __func__, gsi_base);
                goto out;
        }
 
        if (iosapic_lists[index].rtes_inuse) {
                err = -EBUSY;
                printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
-                      __FUNCTION__, gsi_base);
+                      __func__, gsi_base);
                goto out;
        }
 
@@ -1126,7 +1147,7 @@ map_iosapic_to_node(unsigned int gsi_base, int node)
        index = find_iosapic(gsi_base);
        if (index < 0) {
                printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
-                      __FUNCTION__, gsi_base);
+                      __func__, gsi_base);
                return;
        }
        iosapic_lists[index].node = node;