nfsd: track last inode only in use_wgather case
[safe/jmp/linux-2.6] / kernel / rcuclassic.c
index d7ec731..0f2b0b3 100644 (file)
@@ -63,18 +63,37 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
        .completed = -300,
        .pending = -300,
        .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
-       .cpumask = CPU_MASK_NONE,
+       .cpumask = CPU_BITS_NONE,
 };
+
 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
        .cur = -300,
        .completed = -300,
        .pending = -300,
        .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
-       .cpumask = CPU_MASK_NONE,
+       .cpumask = CPU_BITS_NONE,
 };
 
-DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
-DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
+static DEFINE_PER_CPU(struct rcu_data, rcu_data);
+static DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+
+/*
+ * Increment the quiescent state counter.
+ * The counter is a bit degenerated: We do not need to know
+ * how many quiescent states passed, just if there was at least
+ * one since the start of the grace period. Thus just a flag.
+ */
+void rcu_qsctr_inc(int cpu)
+{
+       struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+       rdp->passed_quiesc = 1;
+}
+
+void rcu_bh_qsctr_inc(int cpu)
+{
+       struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+       rdp->passed_quiesc = 1;
+}
 
 static int blimit = 10;
 static int qhimark = 10000;
@@ -85,18 +104,20 @@ static void force_quiescent_state(struct rcu_data *rdp,
                        struct rcu_ctrlblk *rcp)
 {
        int cpu;
-       cpumask_t cpumask;
+       unsigned long flags;
+
        set_need_resched();
+       spin_lock_irqsave(&rcp->lock, flags);
        if (unlikely(!rcp->signaled)) {
                rcp->signaled = 1;
                /*
                 * Don't send IPI to itself. With irqs disabled,
                 * rdp->cpu is the current cpu.
                 *
-                * cpu_online_map is updated by the _cpu_down()
+                * cpu_online_mask is updated by the _cpu_down()
                 * using __stop_machine(). Since we're in irqs disabled
                 * section, __stop_machine() is not exectuting, hence
-                * the cpu_online_map is stable.
+                * the cpu_online_mask is stable.
                 *
                 * However,  a cpu might have been offlined _just_ before
                 * we disabled irqs while entering here.
@@ -104,14 +125,16 @@ static void force_quiescent_state(struct rcu_data *rdp,
                 * notification, leading to the offlined cpu's bit
                 * being set in the rcp->cpumask.
                 *
-                * Hence cpumask = (rcp->cpumask & cpu_online_map) to prevent
+                * Hence cpumask = (rcp->cpumask & cpu_online_mask) to prevent
                 * sending smp_reschedule() to an offlined CPU.
                 */
-               cpus_and(cpumask, rcp->cpumask, cpu_online_map);
-               cpu_clear(rdp->cpu, cpumask);
-               for_each_cpu_mask_nr(cpu, cpumask)
-                       smp_send_reschedule(cpu);
+               for_each_cpu_and(cpu,
+                                 to_cpumask(rcp->cpumask), cpu_online_mask) {
+                       if (cpu != rdp->cpu)
+                               smp_send_reschedule(cpu);
+               }
        }
+       spin_unlock_irqrestore(&rcp->lock, flags);
 }
 #else
 static inline void force_quiescent_state(struct rcu_data *rdp,
@@ -125,7 +148,9 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
                struct rcu_data *rdp)
 {
        long batch;
-       smp_mb(); /* reads the most recently updated value of rcu->cur. */
+
+       head->next = NULL;
+       smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
 
        /*
         * Determine the batch number of this callback.
@@ -158,6 +183,88 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
        }
 }
 
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+       rcp->gp_start = jiffies;
+       rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       int cpu;
+       long delta;
+       unsigned long flags;
+
+       /* Only let one CPU complain about others per time interval. */
+
+       spin_lock_irqsave(&rcp->lock, flags);
+       delta = jiffies - rcp->jiffies_stall;
+       if (delta < 2 || rcp->cur != rcp->completed) {
+               spin_unlock_irqrestore(&rcp->lock, flags);
+               return;
+       }
+       rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+       spin_unlock_irqrestore(&rcp->lock, flags);
+
+       /* OK, time to rat on our buddy... */
+
+       printk(KERN_ERR "INFO: RCU detected CPU stalls:");
+       for_each_possible_cpu(cpu) {
+               if (cpumask_test_cpu(cpu, to_cpumask(rcp->cpumask)))
+                       printk(" %d", cpu);
+       }
+       printk(" (detected by %d, t=%ld jiffies)\n",
+              smp_processor_id(), (long)(jiffies - rcp->gp_start));
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       unsigned long flags;
+
+       printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+                       smp_processor_id(), jiffies,
+                       jiffies - rcp->gp_start);
+       dump_stack();
+       spin_lock_irqsave(&rcp->lock, flags);
+       if ((long)(jiffies - rcp->jiffies_stall) >= 0)
+               rcp->jiffies_stall =
+                       jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+       spin_unlock_irqrestore(&rcp->lock, flags);
+       set_need_resched();  /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       long delta;
+
+       delta = jiffies - rcp->jiffies_stall;
+       if (cpumask_test_cpu(smp_processor_id(), to_cpumask(rcp->cpumask)) &&
+               delta >= 0) {
+
+               /* We haven't checked in, so go dump stack. */
+               print_cpu_stall(rcp);
+
+       } else if (rcp->cur != rcp->completed && delta >= 2) {
+
+               /* They had two seconds to dump stack, so complain. */
+               print_other_cpu_stall(rcp);
+       }
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -175,7 +282,6 @@ void call_rcu(struct rcu_head *head,
        unsigned long flags;
 
        head->func = func;
-       head->next = NULL;
        local_irq_save(flags);
        __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
        local_irq_restore(flags);
@@ -204,7 +310,6 @@ void call_rcu_bh(struct rcu_head *head,
        unsigned long flags;
 
        head->func = func;
-       head->next = NULL;
        local_irq_save(flags);
        __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
        local_irq_restore(flags);
@@ -243,6 +348,7 @@ static inline void raise_rcu_softirq(void)
  */
 static void rcu_do_batch(struct rcu_data *rdp)
 {
+       unsigned long flags;
        struct rcu_head *next, *list;
        int count = 0;
 
@@ -257,9 +363,9 @@ static void rcu_do_batch(struct rcu_data *rdp)
        }
        rdp->donelist = list;
 
-       local_irq_disable();
+       local_irq_save(flags);
        rdp->qlen -= count;
-       local_irq_enable();
+       local_irq_restore(flags);
        if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
                rdp->blimit = blimit;
 
@@ -288,80 +394,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
  *   period (if necessary).
  */
 
-#ifdef CONFIG_DEBUG_RCU_STALL
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
-       rcp->gp_check = get_seconds() + 3;
-}
-static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-       int cpu;
-       long delta;
-
-       /* Only let one CPU complain about others per time interval. */
-
-       spin_lock(&rcp->lock);
-       delta = get_seconds() - rcp->gp_check;
-       if (delta < 2L ||
-           cpus_empty(rcp->cpumask)) {
-               spin_unlock(&rcp->lock);
-               return;
-       rcp->gp_check = get_seconds() + 30;
-       }
-       spin_unlock(&rcp->lock);
-
-       /* OK, time to rat on our buddy... */
-
-       printk(KERN_ERR "RCU detected CPU stalls:");
-       for_each_cpu_mask(cpu, rcp->cpumask)
-               printk(" %d", cpu);
-       printk(" (detected by %d, t=%lu/%lu)\n",
-              smp_processor_id(), get_seconds(), rcp->gp_check);
-}
-static void print_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-       printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
-                       smp_processor_id(), get_seconds(), rcp->gp_check);
-       dump_stack();
-       spin_lock(&rcp->lock);
-       if ((long)(get_seconds() - rcp->gp_check) >= 0L)
-               rcp->gp_check = get_seconds() + 30;
-       spin_unlock(&rcp->lock);
-}
-static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
-                                  struct rcu_data *rdp)
-{
-       long delta;
-
-       delta = get_seconds() - rcp->gp_check;
-       if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
-
-               /* We haven't checked in, so go dump stack. */
-
-               print_cpu_stall(rcp);
-
-       } else if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
-
-               /* They had two seconds to dump stack, so complain. */
-
-               print_other_cpu_stall(rcp);
-
-       }
-}
-
-#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
-}
-static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
-                                  struct rcu_data *rdp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
-
 /*
  * Register a new batch of callbacks, and start it up if there is currently no
  * active batch and the batch to be registered has not already occurred.
@@ -372,7 +404,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
        if (rcp->cur != rcp->pending &&
                        rcp->completed == rcp->cur) {
                rcp->cur++;
-               record_gp_check_time(rcp);
+               record_gp_stall_check_time(rcp);
 
                /*
                 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -381,7 +413,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
                 * unnecessarily.
                 */
                smp_mb();
-               cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+               cpumask_andnot(to_cpumask(rcp->cpumask),
+                              cpu_online_mask, nohz_cpu_mask);
 
                rcp->signaled = 0;
        }
@@ -394,8 +427,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
  */
 static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 {
-       cpu_clear(cpu, rcp->cpumask);
-       if (cpus_empty(rcp->cpumask)) {
+       cpumask_clear_cpu(cpu, to_cpumask(rcp->cpumask));
+       if (cpumask_empty(to_cpumask(rcp->cpumask))) {
                /* batch completed ! */
                rcp->completed = rcp->cur;
                rcu_start_batch(rcp);
@@ -410,6 +443,8 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
                                        struct rcu_data *rdp)
 {
+       unsigned long flags;
+
        if (rdp->quiescbatch != rcp->cur) {
                /* start new grace period: */
                rdp->qs_pending = 1;
@@ -433,7 +468,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
                return;
        rdp->qs_pending = 0;
 
-       spin_lock(&rcp->lock);
+       spin_lock_irqsave(&rcp->lock, flags);
        /*
         * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
         * during cpu startup. Ignore the quiescent state.
@@ -441,7 +476,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
        if (likely(rdp->quiescbatch == rcp->cur))
                cpu_quiet(rdp->cpu, rcp);
 
-       spin_unlock(&rcp->lock);
+       spin_unlock_irqrestore(&rcp->lock, flags);
 }
 
 
@@ -454,33 +489,36 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
 static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
                                struct rcu_head **tail, long batch)
 {
+       unsigned long flags;
+
        if (list) {
-               local_irq_disable();
+               local_irq_save(flags);
                this_rdp->batch = batch;
                *this_rdp->nxttail[2] = list;
                this_rdp->nxttail[2] = tail;
-               local_irq_enable();
+               local_irq_restore(flags);
        }
 }
 
 static void __rcu_offline_cpu(struct rcu_data *this_rdp,
                                struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
-       /* if the cpu going offline owns the grace period
+       unsigned long flags;
+
+       /*
+        * if the cpu going offline owns the grace period
         * we can block indefinitely waiting for it, so flush
         * it here
         */
-       spin_lock_bh(&rcp->lock);
+       spin_lock_irqsave(&rcp->lock, flags);
        if (rcp->cur != rcp->completed)
                cpu_quiet(rdp->cpu, rcp);
-       spin_unlock_bh(&rcp->lock);
-       /* spin_lock implies smp_mb() */
        rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
        rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
+       spin_unlock(&rcp->lock);
 
-       local_irq_disable();
        this_rdp->qlen += rdp->qlen;
-       local_irq_enable();
+       local_irq_restore(flags);
 }
 
 static void rcu_offline_cpu(int cpu)
@@ -510,16 +548,20 @@ static void rcu_offline_cpu(int cpu)
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
                                        struct rcu_data *rdp)
 {
+       unsigned long flags;
+       long completed_snap;
+
        if (rdp->nxtlist) {
-               local_irq_disable();
+               local_irq_save(flags);
+               completed_snap = ACCESS_ONCE(rcp->completed);
 
                /*
                 * move the other grace-period-completed entries to
                 * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
                 */
-               if (!rcu_batch_before(rcp->completed, rdp->batch))
+               if (!rcu_batch_before(completed_snap, rdp->batch))
                        rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
-               else if (!rcu_batch_before(rcp->completed, rdp->batch - 1))
+               else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
                        rdp->nxttail[0] = rdp->nxttail[1];
 
                /*
@@ -540,16 +582,18 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
                        rdp->nxttail[0] = &rdp->nxtlist;
                }
 
-               local_irq_enable();
+               local_irq_restore(flags);
 
                if (rcu_batch_after(rdp->batch, rcp->pending)) {
+                       unsigned long flags2;
+
                        /* and start it/schedule start if it's a new batch */
-                       spin_lock(&rcp->lock);
+                       spin_lock_irqsave(&rcp->lock, flags2);
                        if (rcu_batch_after(rdp->batch, rcp->pending)) {
                                rcp->pending = rdp->batch;
                                rcu_start_batch(rcp);
                        }
-                       spin_unlock(&rcp->lock);
+                       spin_unlock_irqrestore(&rcp->lock, flags2);
                }
        }
 
@@ -560,23 +604,41 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
 
 static void rcu_process_callbacks(struct softirq_action *unused)
 {
+       /*
+        * Memory references from any prior RCU read-side critical sections
+        * executed by the interrupted code must be see before any RCU
+        * grace-period manupulations below.
+        */
+
+       smp_mb(); /* See above block comment. */
+
        __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
        __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
+
+       /*
+        * Memory references from any later RCU read-side critical sections
+        * executed by the interrupted code must be see after any RCU
+        * grace-period manupulations above.
+        */
+
+       smp_mb(); /* See above block comment. */
 }
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
        /* Check for CPU stalls, if enabled. */
-       check_cpu_stall(rcp, rdp);
+       check_cpu_stall(rcp);
 
        if (rdp->nxtlist) {
+               long completed_snap = ACCESS_ONCE(rcp->completed);
+
                /*
                 * This cpu has pending rcu entries and the grace period
                 * for them has completed.
                 */
-               if (!rcu_batch_before(rcp->completed, rdp->batch))
+               if (!rcu_batch_before(completed_snap, rdp->batch))
                        return 1;
-               if (!rcu_batch_before(rcp->completed, rdp->batch - 1) &&
+               if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
                                rdp->nxttail[0] != rdp->nxttail[1])
                        return 1;
                if (rdp->nxttail[0] != &rdp->nxtlist)
@@ -627,11 +689,17 @@ int rcu_needs_cpu(int cpu)
        return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
 }
 
+/*
+ * Top-level function driving RCU grace-period detection, normally
+ * invoked from the scheduler-clock interrupt.  This function simply
+ * increments counters that are read only from softirq by this same
+ * CPU, so there are no memory barriers required.
+ */
 void rcu_check_callbacks(int cpu, int user)
 {
        if (user ||
-           (idle_cpu(cpu) && !in_softirq() &&
-                               hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+           (idle_cpu(cpu) && rcu_scheduler_active &&
+            !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
 
                /*
                 * Get here if this CPU took its interrupt from user
@@ -667,9 +735,12 @@ void rcu_check_callbacks(int cpu, int user)
        raise_rcu_softirq();
 }
 
-static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
+static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
                                                struct rcu_data *rdp)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&rcp->lock, flags);
        memset(rdp, 0, sizeof(*rdp));
        rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
        rdp->donetail = &rdp->donelist;
@@ -677,6 +748,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
        rdp->qs_pending = 0;
        rdp->cpu = cpu;
        rdp->blimit = blimit;
+       spin_unlock_irqrestore(&rcp->lock, flags);
 }
 
 static void __cpuinit rcu_online_cpu(int cpu)
@@ -721,6 +793,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
  */
 void __init __rcu_init(void)
 {
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+       printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
        rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
                        (void *)(long)smp_processor_id());
        /* Register notifier for non-boot CPUs */