nfsd: track last inode only in use_wgather case

[safe/jmp/linux-2.6] / kernel / rcuclassic.c
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c

index d7ec731..0f2b0b3 100644 (file)
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -63,18 +63,37 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
         .completed = -300,
         .pending = -300,
         .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
-       .cpumask = CPU_MASK_NONE,
+       .cpumask = CPU_BITS_NONE,
  };
+
  static struct rcu_ctrlblk rcu_bh_ctrlblk = {
         .cur = -300,
         .completed = -300,
         .pending = -300,
         .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
-       .cpumask = CPU_MASK_NONE,
+       .cpumask = CPU_BITS_NONE,
  };
  
-DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
-DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
+static DEFINE_PER_CPU(struct rcu_data, rcu_data);
+static DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+
+/*
+ * Increment the quiescent state counter.
+ * The counter is a bit degenerated: We do not need to know
+ * how many quiescent states passed, just if there was at least
+ * one since the start of the grace period. Thus just a flag.
+ */
+void rcu_qsctr_inc(int cpu)
+{
+       struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+       rdp->passed_quiesc = 1;
+}
+
+void rcu_bh_qsctr_inc(int cpu)
+{
+       struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+       rdp->passed_quiesc = 1;
+}
  
  static int blimit = 10;
  static int qhimark = 10000;
@@ -85,18 +104,20 @@ static void force_quiescent_state(struct rcu_data *rdp,
                         struct rcu_ctrlblk *rcp)
  {
         int cpu;
-       cpumask_t cpumask;
+       unsigned long flags;
+
         set_need_resched();
+       spin_lock_irqsave(&rcp->lock, flags);
         if (unlikely(!rcp->signaled)) {
                 rcp->signaled = 1;
                 /*
                  * Don't send IPI to itself. With irqs disabled,
                  * rdp->cpu is the current cpu.
                  *
-                * cpu_online_map is updated by the _cpu_down()
+                * cpu_online_mask is updated by the _cpu_down()
                  * using __stop_machine(). Since we're in irqs disabled
                  * section, __stop_machine() is not exectuting, hence
-                * the cpu_online_map is stable.
+                * the cpu_online_mask is stable.
                  *
                  * However,  a cpu might have been offlined _just_ before
                  * we disabled irqs while entering here.
@@ -104,14 +125,16 @@ static void force_quiescent_state(struct rcu_data *rdp,
                  * notification, leading to the offlined cpu's bit
                  * being set in the rcp->cpumask.
                  *
-                * Hence cpumask = (rcp->cpumask & cpu_online_map) to prevent
+                * Hence cpumask = (rcp->cpumask & cpu_online_mask) to prevent
                  * sending smp_reschedule() to an offlined CPU.
                  */
-               cpus_and(cpumask, rcp->cpumask, cpu_online_map);
-               cpu_clear(rdp->cpu, cpumask);
-               for_each_cpu_mask_nr(cpu, cpumask)
-                       smp_send_reschedule(cpu);
+               for_each_cpu_and(cpu,
+                                 to_cpumask(rcp->cpumask), cpu_online_mask) {
+                       if (cpu != rdp->cpu)
+                               smp_send_reschedule(cpu);
+               }
         }
+       spin_unlock_irqrestore(&rcp->lock, flags);
  }
  #else
  static inline void force_quiescent_state(struct rcu_data *rdp,
@@ -125,7 +148,9 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
                 struct rcu_data *rdp)
  {
         long batch;
-       smp_mb(); /* reads the most recently updated value of rcu->cur. */
+
+       head->next = NULL;
+       smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
  
         /*
          * Determine the batch number of this callback.
@@ -158,6 +183,88 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
         }
  }
  
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+       rcp->gp_start = jiffies;
+       rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       int cpu;
+       long delta;
+       unsigned long flags;
+
+       /* Only let one CPU complain about others per time interval. */
+
+       spin_lock_irqsave(&rcp->lock, flags);
+       delta = jiffies - rcp->jiffies_stall;
+       if (delta < 2 || rcp->cur != rcp->completed) {
+               spin_unlock_irqrestore(&rcp->lock, flags);
+               return;
+       }
+       rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+       spin_unlock_irqrestore(&rcp->lock, flags);
+
+       /* OK, time to rat on our buddy... */
+
+       printk(KERN_ERR "INFO: RCU detected CPU stalls:");
+       for_each_possible_cpu(cpu) {
+               if (cpumask_test_cpu(cpu, to_cpumask(rcp->cpumask)))
+                       printk(" %d", cpu);
+       }
+       printk(" (detected by %d, t=%ld jiffies)\n",
+              smp_processor_id(), (long)(jiffies - rcp->gp_start));
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       unsigned long flags;
+
+       printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+                       smp_processor_id(), jiffies,
+                       jiffies - rcp->gp_start);
+       dump_stack();
+       spin_lock_irqsave(&rcp->lock, flags);
+       if ((long)(jiffies - rcp->jiffies_stall) >= 0)
+               rcp->jiffies_stall =
+                       jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+       spin_unlock_irqrestore(&rcp->lock, flags);
+       set_need_resched();  /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       long delta;
+
+       delta = jiffies - rcp->jiffies_stall;
+       if (cpumask_test_cpu(smp_processor_id(), to_cpumask(rcp->cpumask)) &&
+               delta >= 0) {
+
+               /* We haven't checked in, so go dump stack. */
+               print_cpu_stall(rcp);
+
+       } else if (rcp->cur != rcp->completed && delta >= 2) {
+
+               /* They had two seconds to dump stack, so complain. */
+               print_other_cpu_stall(rcp);
+       }
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
  /**
   * call_rcu - Queue an RCU callback for invocation after a grace period.
   * @head: structure to be used for queueing the RCU updates.
@@ -175,7 +282,6 @@ void call_rcu(struct rcu_head *head,
         unsigned long flags;
  
         head->func = func;
-       head->next = NULL;
         local_irq_save(flags);
         __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
         local_irq_restore(flags);
@@ -204,7 +310,6 @@ void call_rcu_bh(struct rcu_head *head,
         unsigned long flags;
  
         head->func = func;
-       head->next = NULL;
         local_irq_save(flags);
         __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
         local_irq_restore(flags);
@@ -243,6 +348,7 @@ static inline void raise_rcu_softirq(void)
   */
  static void rcu_do_batch(struct rcu_data *rdp)
  {
+       unsigned long flags;
         struct rcu_head *next, *list;
         int count = 0;
  
@@ -257,9 +363,9 @@ static void rcu_do_batch(struct rcu_data *rdp)
         }
         rdp->donelist = list;
  
-       local_irq_disable();
+       local_irq_save(flags);
         rdp->qlen -= count;
-       local_irq_enable();
+       local_irq_restore(flags);
         if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
                 rdp->blimit = blimit;
  
@@ -288,80 +394,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
   *   period (if necessary).
   */
  
-#ifdef CONFIG_DEBUG_RCU_STALL
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
-       rcp->gp_check = get_seconds() + 3;
-}
-static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-       int cpu;
-       long delta;
-
-       /* Only let one CPU complain about others per time interval. */
-
-       spin_lock(&rcp->lock);
-       delta = get_seconds() - rcp->gp_check;
-       if (delta < 2L ||
-           cpus_empty(rcp->cpumask)) {
-               spin_unlock(&rcp->lock);
-               return;
-       rcp->gp_check = get_seconds() + 30;
-       }
-       spin_unlock(&rcp->lock);
-
-       /* OK, time to rat on our buddy... */
-
-       printk(KERN_ERR "RCU detected CPU stalls:");
-       for_each_cpu_mask(cpu, rcp->cpumask)
-               printk(" %d", cpu);
-       printk(" (detected by %d, t=%lu/%lu)\n",
-              smp_processor_id(), get_seconds(), rcp->gp_check);
-}
-static void print_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-       printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
-                       smp_processor_id(), get_seconds(), rcp->gp_check);
-       dump_stack();
-       spin_lock(&rcp->lock);
-       if ((long)(get_seconds() - rcp->gp_check) >= 0L)
-               rcp->gp_check = get_seconds() + 30;
-       spin_unlock(&rcp->lock);
-}
-static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
-                                  struct rcu_data *rdp)
-{
-       long delta;
-
-       delta = get_seconds() - rcp->gp_check;
-       if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
-
-               /* We haven't checked in, so go dump stack. */
-
-               print_cpu_stall(rcp);
-
-       } else if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
-
-               /* They had two seconds to dump stack, so complain. */
-
-               print_other_cpu_stall(rcp);
-
-       }
-}
-
-#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
-}
-static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
-                                  struct rcu_data *rdp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
-
  /*
   * Register a new batch of callbacks, and start it up if there is currently no
   * active batch and the batch to be registered has not already occurred.
@@ -372,7 +404,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
         if (rcp->cur != rcp->pending &&
                         rcp->completed == rcp->cur) {
                 rcp->cur++;
-               record_gp_check_time(rcp);
+               record_gp_stall_check_time(rcp);
  
                 /*
                  * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -381,7 +413,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
                  * unnecessarily.
                  */
                 smp_mb();
-               cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+               cpumask_andnot(to_cpumask(rcp->cpumask),
+                              cpu_online_mask, nohz_cpu_mask);
  
                 rcp->signaled = 0;
         }
@@ -394,8 +427,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
   */
  static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
  {
-       cpu_clear(cpu, rcp->cpumask);
-       if (cpus_empty(rcp->cpumask)) {
+       cpumask_clear_cpu(cpu, to_cpumask(rcp->cpumask));
+       if (cpumask_empty(to_cpumask(rcp->cpumask))) {
                 /* batch completed ! */
                 rcp->completed = rcp->cur;
                 rcu_start_batch(rcp);
@@ -410,6 +443,8 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
  static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
                                         struct rcu_data *rdp)
  {
+       unsigned long flags;
+
         if (rdp->quiescbatch != rcp->cur) {
                 /* start new grace period: */
                 rdp->qs_pending = 1;
@@ -433,7 +468,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
                 return;
         rdp->qs_pending = 0;
  
-       spin_lock(&rcp->lock);
+       spin_lock_irqsave(&rcp->lock, flags);
         /*
          * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
          * during cpu startup. Ignore the quiescent state.
@@ -441,7 +476,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
         if (likely(rdp->quiescbatch == rcp->cur))
                 cpu_quiet(rdp->cpu, rcp);
  
-       spin_unlock(&rcp->lock);
+       spin_unlock_irqrestore(&rcp->lock, flags);
  }
  
  
@@ -454,33 +489,36 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
  static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
                                 struct rcu_head **tail, long batch)
  {
+       unsigned long flags;
+
         if (list) {
-               local_irq_disable();
+               local_irq_save(flags);
                 this_rdp->batch = batch;
                 *this_rdp->nxttail[2] = list;
                 this_rdp->nxttail[2] = tail;
-               local_irq_enable();
+               local_irq_restore(flags);
         }
  }
  
  static void __rcu_offline_cpu(struct rcu_data *this_rdp,
                                 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
  {
-       /* if the cpu going offline owns the grace period
+       unsigned long flags;
+
+       /*
+        * if the cpu going offline owns the grace period
          * we can block indefinitely waiting for it, so flush
          * it here
          */
-       spin_lock_bh(&rcp->lock);
+       spin_lock_irqsave(&rcp->lock, flags);
         if (rcp->cur != rcp->completed)
                 cpu_quiet(rdp->cpu, rcp);
-       spin_unlock_bh(&rcp->lock);
-       /* spin_lock implies smp_mb() */
         rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
         rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
+       spin_unlock(&rcp->lock);
  
-       local_irq_disable();
         this_rdp->qlen += rdp->qlen;
-       local_irq_enable();
+       local_irq_restore(flags);
  }
  
  static void rcu_offline_cpu(int cpu)
@@ -510,16 +548,20 @@ static void rcu_offline_cpu(int cpu)
  static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
                                         struct rcu_data *rdp)
  {
+       unsigned long flags;
+       long completed_snap;
+
         if (rdp->nxtlist) {
-               local_irq_disable();
+               local_irq_save(flags);
+               completed_snap = ACCESS_ONCE(rcp->completed);
  
                 /*
                  * move the other grace-period-completed entries to
                  * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
                  */
-               if (!rcu_batch_before(rcp->completed, rdp->batch))
+               if (!rcu_batch_before(completed_snap, rdp->batch))
                         rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
-               else if (!rcu_batch_before(rcp->completed, rdp->batch - 1))
+               else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
                         rdp->nxttail[0] = rdp->nxttail[1];
  
                 /*
@@ -540,16 +582,18 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
                         rdp->nxttail[0] = &rdp->nxtlist;
                 }
  
-               local_irq_enable();
+               local_irq_restore(flags);
  
                 if (rcu_batch_after(rdp->batch, rcp->pending)) {
+                       unsigned long flags2;
+
                         /* and start it/schedule start if it's a new batch */
-                       spin_lock(&rcp->lock);
+                       spin_lock_irqsave(&rcp->lock, flags2);
                         if (rcu_batch_after(rdp->batch, rcp->pending)) {
                                 rcp->pending = rdp->batch;
                                 rcu_start_batch(rcp);
                         }
-                       spin_unlock(&rcp->lock);
+                       spin_unlock_irqrestore(&rcp->lock, flags2);
                 }
         }
  
@@ -560,23 +604,41 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
  
  static void rcu_process_callbacks(struct softirq_action *unused)
  {
+       /*
+        * Memory references from any prior RCU read-side critical sections
+        * executed by the interrupted code must be see before any RCU
+        * grace-period manupulations below.
+        */
+
+       smp_mb(); /* See above block comment. */
+
         __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
         __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
+
+       /*
+        * Memory references from any later RCU read-side critical sections
+        * executed by the interrupted code must be see after any RCU
+        * grace-period manupulations above.
+        */
+
+       smp_mb(); /* See above block comment. */
  }
  
  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
  {
         /* Check for CPU stalls, if enabled. */
-       check_cpu_stall(rcp, rdp);
+       check_cpu_stall(rcp);
  
         if (rdp->nxtlist) {
+               long completed_snap = ACCESS_ONCE(rcp->completed);
+
                 /*
                  * This cpu has pending rcu entries and the grace period
                  * for them has completed.
                  */
-               if (!rcu_batch_before(rcp->completed, rdp->batch))
+               if (!rcu_batch_before(completed_snap, rdp->batch))
                         return 1;
-               if (!rcu_batch_before(rcp->completed, rdp->batch - 1) &&
+               if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
                                 rdp->nxttail[0] != rdp->nxttail[1])
                         return 1;
                 if (rdp->nxttail[0] != &rdp->nxtlist)
@@ -627,11 +689,17 @@ int rcu_needs_cpu(int cpu)
         return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
  }
  
+/*
+ * Top-level function driving RCU grace-period detection, normally
+ * invoked from the scheduler-clock interrupt.  This function simply
+ * increments counters that are read only from softirq by this same
+ * CPU, so there are no memory barriers required.
+ */
  void rcu_check_callbacks(int cpu, int user)
  {
         if (user ||
-           (idle_cpu(cpu) && !in_softirq() &&
-                               hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+           (idle_cpu(cpu) && rcu_scheduler_active &&
+            !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
  
                 /*
                  * Get here if this CPU took its interrupt from user
@@ -667,9 +735,12 @@ void rcu_check_callbacks(int cpu, int user)
         raise_rcu_softirq();
  }
  
-static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
+static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
                                                 struct rcu_data *rdp)
  {
+       unsigned long flags;
+
+       spin_lock_irqsave(&rcp->lock, flags);
         memset(rdp, 0, sizeof(*rdp));
         rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
         rdp->donetail = &rdp->donelist;
@@ -677,6 +748,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
         rdp->qs_pending = 0;
         rdp->cpu = cpu;
         rdp->blimit = blimit;
+       spin_unlock_irqrestore(&rcp->lock, flags);
  }
  
  static void __cpuinit rcu_online_cpu(int cpu)
@@ -721,6 +793,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
   */
  void __init __rcu_init(void)
  {
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+       printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
         rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
                         (void *)(long)smp_processor_id());
         /* Register notifier for non-boot CPUs */