Merge branch 'x86/mm' into core/percpu
[safe/jmp/linux-2.6] / arch / x86 / mm / tlb.c
similarity index 72%
rename from arch/x86/kernel/tlb_64.c
rename to arch/x86/mm/tlb.c
index 8cfea5d..72a6d4e 100644 (file)
@@ -1,22 +1,18 @@
 #include <linux/init.h>
 
 #include <linux/mm.h>
-#include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
-#include <asm/proto.h>
-#include <asm/apicdef.h>
-#include <asm/idle.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_bau.h>
+#include <asm/apic.h>
+#include <asm/uv/uv.h>
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
+                       = { &init_mm, 0, };
 
 #include <mach_ipi.h>
 /*
 
 union smp_flush_state {
        struct {
-               cpumask_t flush_cpumask;
                struct mm_struct *flush_mm;
                unsigned long flush_va;
                spinlock_t tlbstate_lock;
+               DECLARE_BITMAP(flush_cpumask, NR_CPUS);
        };
        char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
 } ____cacheline_internodealigned_in_smp;
@@ -62,9 +58,9 @@ static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
  */
 void leave_mm(int cpu)
 {
-       if (read_pda(mmu_state) == TLBSTATE_OK)
+       if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
                BUG();
-       cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
+       cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
        load_cr3(swapper_pg_dir);
 }
 EXPORT_SYMBOL_GPL(leave_mm);
@@ -117,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm);
  * Interrupts are disabled.
  */
 
-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
+/*
+ * FIXME: use of asmlinkage is not consistent.  On x86_64 it's noop
+ * but still used for documentation purpose but the usage is slightly
+ * inconsistent.  On x86_32, asmlinkage is regparm(0) but interrupt
+ * entry calls in with the first parameter in %eax.  Maybe define
+ * intrlinkage?
+ */
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
+void smp_invalidate_interrupt(struct pt_regs *regs)
 {
-       int cpu;
-       int sender;
+       unsigned int cpu;
+       unsigned int sender;
        union smp_flush_state *f;
 
        cpu = smp_processor_id();
@@ -131,7 +137,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
        sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
        f = &flush_state[sender];
 
-       if (!cpu_isset(cpu, f->flush_cpumask))
+       if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
                goto out;
                /*
                 * This was a BUG() but until someone can quote me the
@@ -142,8 +148,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
                 * BUG();
                 */
 
-       if (f->flush_mm == read_pda(active_mm)) {
-               if (read_pda(mmu_state) == TLBSTATE_OK) {
+       if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
+               if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
                        if (f->flush_va == TLB_FLUSH_ALL)
                                local_flush_tlb();
                        else
@@ -153,19 +159,17 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
        }
 out:
        ack_APIC_irq();
-       cpu_clear(cpu, f->flush_cpumask);
+       smp_mb__before_clear_bit();
+       cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
+       smp_mb__after_clear_bit();
        inc_irq_stat(irq_tlb_count);
 }
 
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
-                            unsigned long va)
+static void flush_tlb_others_ipi(const struct cpumask *cpumask,
+                                struct mm_struct *mm, unsigned long va)
 {
-       int sender;
+       unsigned int sender;
        union smp_flush_state *f;
-       cpumask_t cpumask = *cpumaskp;
-
-       if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
-               return;
 
        /* Caller has disabled preemption */
        sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
@@ -180,7 +184,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 
        f->flush_mm = mm;
        f->flush_va = va;
-       cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
+       cpumask_andnot(to_cpumask(f->flush_cpumask),
+                      cpumask, cpumask_of(smp_processor_id()));
 
        /*
         * Make the above memory operations globally visible before
@@ -191,9 +196,10 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
         * We have to send the IPI only to
         * CPUs affected.
         */
-       send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+       send_IPI_mask(to_cpumask(f->flush_cpumask),
+                     INVALIDATE_TLB_VECTOR_START + sender);
 
-       while (!cpus_empty(f->flush_cpumask))
+       while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
                cpu_relax();
 
        f->flush_mm = NULL;
@@ -201,6 +207,22 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
        spin_unlock(&f->tlbstate_lock);
 }
 
+void native_flush_tlb_others(const struct cpumask *cpumask,
+                            struct mm_struct *mm, unsigned long va)
+{
+       if (is_uv_system()) {
+               unsigned int cpu;
+
+               cpu = get_cpu();
+               cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
+               if (cpumask)
+                       flush_tlb_others_ipi(cpumask, mm, va);
+               put_cpu();
+               return;
+       }
+       flush_tlb_others_ipi(cpumask, mm, va);
+}
+
 static int __cpuinit init_smp_flush(void)
 {
        int i;
@@ -215,25 +237,18 @@ core_initcall(init_smp_flush);
 void flush_tlb_current_task(void)
 {
        struct mm_struct *mm = current->mm;
-       cpumask_t cpu_mask;
 
        preempt_disable();
-       cpu_mask = mm->cpu_vm_mask;
-       cpu_clear(smp_processor_id(), cpu_mask);
 
        local_flush_tlb();
-       if (!cpus_empty(cpu_mask))
-               flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+       if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+               flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
        preempt_enable();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-       cpumask_t cpu_mask;
-
        preempt_disable();
-       cpu_mask = mm->cpu_vm_mask;
-       cpu_clear(smp_processor_id(), cpu_mask);
 
        if (current->active_mm == mm) {
                if (current->mm)
@@ -241,8 +256,8 @@ void flush_tlb_mm(struct mm_struct *mm)
                else
                        leave_mm(smp_processor_id());
        }
-       if (!cpus_empty(cpu_mask))
-               flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+       if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+               flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
 
        preempt_enable();
 }
@@ -250,11 +265,8 @@ void flush_tlb_mm(struct mm_struct *mm)
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
        struct mm_struct *mm = vma->vm_mm;
-       cpumask_t cpu_mask;
 
        preempt_disable();
-       cpu_mask = mm->cpu_vm_mask;
-       cpu_clear(smp_processor_id(), cpu_mask);
 
        if (current->active_mm == mm) {
                if (current->mm)
@@ -263,8 +275,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
                        leave_mm(smp_processor_id());
        }
 
-       if (!cpus_empty(cpu_mask))
-               flush_tlb_others(cpu_mask, mm, va);
+       if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+               flush_tlb_others(&mm->cpu_vm_mask, mm, va);
 
        preempt_enable();
 }
@@ -274,7 +286,7 @@ static void do_flush_tlb_all(void *info)
        unsigned long cpu = smp_processor_id();
 
        __flush_tlb_all();
-       if (read_pda(mmu_state) == TLBSTATE_LAZY)
+       if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
                leave_mm(cpu);
 }