sh: convert /proc/cpu/aligmnent, /proc/cpu/kernel_alignment to seq_file
[safe/jmp/linux-2.6] / kernel / rcutree_plugin.h
index cd2ab67..ef2a58c 100644 (file)
@@ -64,22 +64,31 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * not in a quiescent state.  There might be any number of tasks blocked
  * while in an RCU read-side critical section.
  */
-static void rcu_preempt_qs_record(int cpu)
+static void rcu_preempt_qs(int cpu)
 {
        struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
-       rdp->passed_quiesc = 1;
        rdp->passed_quiesc_completed = rdp->completed;
+       barrier();
+       rdp->passed_quiesc = 1;
 }
 
 /*
- * We have entered the scheduler or are between softirqs in ksoftirqd.
- * If we are in an RCU read-side critical section, we need to reflect
- * that in the state of the rcu_node structure corresponding to this CPU.
- * Caller must disable hardirqs.
+ * We have entered the scheduler, and the current task might soon be
+ * context-switched away from.  If this task is in an RCU read-side
+ * critical section, we will no longer be able to rely on the CPU to
+ * record that fact, so we enqueue the task on the appropriate entry
+ * of the blocked_tasks[] array.  The task will dequeue itself when
+ * it exits the outermost enclosing RCU read-side critical section.
+ * Therefore, the current grace period cannot be permitted to complete
+ * until the blocked_tasks[] entry indexed by the low-order bit of
+ * rnp->gpnum empties.
+ *
+ * Caller must disable preemption.
  */
-static void rcu_preempt_qs(int cpu)
+static void rcu_preempt_note_context_switch(int cpu)
 {
        struct task_struct *t = current;
+       unsigned long flags;
        int phase;
        struct rcu_data *rdp;
        struct rcu_node *rnp;
@@ -90,9 +99,9 @@ static void rcu_preempt_qs(int cpu)
                /* Possibly blocking in an RCU read-side critical section. */
                rdp = rcu_preempt_state.rda[cpu];
                rnp = rdp->mynode;
-               spin_lock(&rnp->lock);
+               spin_lock_irqsave(&rnp->lock, flags);
                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
-               t->rcu_blocked_cpu = cpu;
+               t->rcu_blocked_node = rnp;
 
                /*
                 * If this CPU has already checked in, then this task
@@ -103,11 +112,15 @@ static void rcu_preempt_qs(int cpu)
                 * state for the current grace period), then as long
                 * as that task remains queued, the current grace period
                 * cannot end.
+                *
+                * But first, note that the current CPU must still be
+                * on line!
                 */
-               phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
+               WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
+               WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
+               phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
                list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
-               smp_mb();  /* Ensure later ctxt swtch seen after above. */
-               spin_unlock(&rnp->lock);
+               spin_unlock_irqrestore(&rnp->lock, flags);
        }
 
        /*
@@ -119,9 +132,10 @@ static void rcu_preempt_qs(int cpu)
         * grace period, then the fact that the task has been enqueued
         * means that we continue to block the current grace period.
         */
-       rcu_preempt_qs_record(cpu);
-       t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS |
-                                       RCU_READ_UNLOCK_GOT_QS);
+       rcu_preempt_qs(cpu);
+       local_irq_save(flags);
+       t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+       local_irq_restore(flags);
 }
 
 /*
@@ -136,6 +150,16 @@ void __rcu_read_lock(void)
 }
 EXPORT_SYMBOL_GPL(__rcu_read_lock);
 
+/*
+ * Check for preempted RCU readers blocking the current grace period
+ * for the specified rcu_node structure.  If the caller needs a reliable
+ * answer, it must hold the rcu_node's ->lock.
+ */
+static int rcu_preempted_readers(struct rcu_node *rnp)
+{
+       return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
+}
+
 static void rcu_read_unlock_special(struct task_struct *t)
 {
        int empty;
@@ -157,7 +181,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
        special = t->rcu_read_unlock_special;
        if (special & RCU_READ_UNLOCK_NEED_QS) {
                t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
-               t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS;
+               rcu_preempt_qs(smp_processor_id());
        }
 
        /* Hardware IRQ handlers cannot block. */
@@ -170,12 +194,21 @@ static void rcu_read_unlock_special(struct task_struct *t)
        if (special & RCU_READ_UNLOCK_BLOCKED) {
                t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 
-               /* Remove this task from the list it blocked on. */
-               rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode;
-               spin_lock(&rnp->lock);
-               empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
+               /*
+                * Remove this task from the list it blocked on.  The
+                * task can migrate while we acquire the lock, but at
+                * most one time.  So at most two passes through loop.
+                */
+               for (;;) {
+                       rnp = t->rcu_blocked_node;
+                       spin_lock(&rnp->lock);  /* irqs already disabled. */
+                       if (rnp == t->rcu_blocked_node)
+                               break;
+                       spin_unlock(&rnp->lock);  /* irqs remain disabled. */
+               }
+               empty = !rcu_preempted_readers(rnp);
                list_del_init(&t->rcu_node_entry);
-               t->rcu_blocked_cpu = -1;
+               t->rcu_blocked_node = NULL;
 
                /*
                 * If this was the last task on the current list, and if
@@ -184,10 +217,9 @@ static void rcu_read_unlock_special(struct task_struct *t)
                 * drop rnp->lock and restore irq.
                 */
                if (!empty && rnp->qsmask == 0 &&
-                   list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
-                       t->rcu_read_unlock_special &=
-                               ~(RCU_READ_UNLOCK_NEED_QS |
-                                 RCU_READ_UNLOCK_GOT_QS);
+                   !rcu_preempted_readers(rnp)) {
+                       struct rcu_node *rnp_p;
+
                        if (rnp->parent == NULL) {
                                /* Only one rcu_node in the tree. */
                                cpu_quiet_msk_finish(&rcu_preempt_state, flags);
@@ -196,9 +228,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
                        /* Report up the rest of the hierarchy. */
                        mask = rnp->grpmask;
                        spin_unlock_irqrestore(&rnp->lock, flags);
-                       rnp = rnp->parent;
-                       spin_lock_irqsave(&rnp->lock, flags);
-                       cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags);
+                       rnp_p = rnp->parent;
+                       spin_lock_irqsave(&rnp_p->lock, flags);
+                       WARN_ON_ONCE(rnp->qsmask);
+                       cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags);
                        return;
                }
                spin_unlock(&rnp->lock);
@@ -234,12 +267,12 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
 {
        unsigned long flags;
        struct list_head *lp;
-       int phase = rnp->gpnum & 0x1;
+       int phase;
        struct task_struct *t;
 
-       if (!list_empty(&rnp->blocked_tasks[phase])) {
+       if (rcu_preempted_readers(rnp)) {
                spin_lock_irqsave(&rnp->lock, flags);
-               phase = rnp->gpnum & 0x1; /* re-read under lock. */
+               phase = rnp->gpnum & 0x1;
                lp = &rnp->blocked_tasks[phase];
                list_for_each_entry(t, lp, rcu_node_entry)
                        printk(" P%d", t->pid);
@@ -250,16 +283,84 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /*
- * Check for preempted RCU readers for the specified rcu_node structure.
- * If the caller needs a reliable answer, it must hold the rcu_node's
- * >lock.
+ * Check that the list of blocked tasks for the newly completed grace
+ * period is in fact empty.  It is a serious bug to complete a grace
+ * period that still has RCU readers blocked!  This function must be
+ * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
+ * must be held by the caller.
  */
-static int rcu_preempted_readers(struct rcu_node *rnp)
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 {
-       return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
+       WARN_ON_ONCE(rcu_preempted_readers(rnp));
+       WARN_ON_ONCE(rnp->qsmask);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Handle tasklist migration for case in which all CPUs covered by the
+ * specified rcu_node have gone offline.  Move them up to the root
+ * rcu_node.  The reason for not just moving them to the immediate
+ * parent is to remove the need for rcu_read_unlock_special() to
+ * make more than two attempts to acquire the target rcu_node's lock.
+ *
+ * Returns 1 if there was previously a task blocking the current grace
+ * period on the specified rcu_node structure.
+ *
+ * The caller must hold rnp->lock with irqs disabled.
+ */
+static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
+                                    struct rcu_node *rnp,
+                                    struct rcu_data *rdp)
+{
+       int i;
+       struct list_head *lp;
+       struct list_head *lp_root;
+       int retval = rcu_preempted_readers(rnp);
+       struct rcu_node *rnp_root = rcu_get_root(rsp);
+       struct task_struct *tp;
+
+       if (rnp == rnp_root) {
+               WARN_ONCE(1, "Last CPU thought to be offlined?");
+               return 0;  /* Shouldn't happen: at least one CPU online. */
+       }
+       WARN_ON_ONCE(rnp != rdp->mynode &&
+                    (!list_empty(&rnp->blocked_tasks[0]) ||
+                     !list_empty(&rnp->blocked_tasks[1])));
+
+       /*
+        * Move tasks up to root rcu_node.  Rely on the fact that the
+        * root rcu_node can be at most one ahead of the rest of the
+        * rcu_nodes in terms of gp_num value.  This fact allows us to
+        * move the blocked_tasks[] array directly, element by element.
+        */
+       for (i = 0; i < 2; i++) {
+               lp = &rnp->blocked_tasks[i];
+               lp_root = &rnp_root->blocked_tasks[i];
+               while (!list_empty(lp)) {
+                       tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
+                       spin_lock(&rnp_root->lock); /* irqs already disabled */
+                       list_del(&tp->rcu_node_entry);
+                       tp->rcu_blocked_node = rnp_root;
+                       list_add(&tp->rcu_node_entry, lp_root);
+                       spin_unlock(&rnp_root->lock); /* irqs remain disabled */
+               }
+       }
+
+       return retval;
 }
 
 /*
+ * Do CPU-offline processing for preemptable RCU.
+ */
+static void rcu_preempt_offline_cpu(int cpu)
+{
+       __rcu_offline_cpu(cpu, &rcu_preempt_state);
+}
+
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
  * Check for a quiescent state from the current CPU.  When a task blocks,
  * the task is recorded in the corresponding CPU's rcu_node structure,
  * which is checked elsewhere.
@@ -271,20 +372,12 @@ static void rcu_preempt_check_callbacks(int cpu)
        struct task_struct *t = current;
 
        if (t->rcu_read_lock_nesting == 0) {
-               t->rcu_read_unlock_special &=
-                       ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS);
-               rcu_preempt_qs_record(cpu);
+               t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+               rcu_preempt_qs(cpu);
                return;
        }
-       if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
-               if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) {
-                       rcu_preempt_qs_record(cpu);
-                       t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
-               } else if (!(t->rcu_read_unlock_special &
-                            RCU_READ_UNLOCK_NEED_QS)) {
-                       t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
-               }
-       }
+       if (per_cpu(rcu_preempt_data, cpu).qs_pending)
+               t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 }
 
 /*
@@ -306,6 +399,17 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 EXPORT_SYMBOL_GPL(call_rcu);
 
 /*
+ * Wait for an rcu-preempt grace period.  We are supposed to expedite the
+ * grace period, but this is the crude slow compatability hack, so just
+ * invoke synchronize_rcu().
+ */
+void synchronize_rcu_expedited(void)
+{
+       synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
+/*
  * Check to see if there is any immediate preemptable-RCU-related work
  * to be done.
  */
@@ -323,6 +427,15 @@ static int rcu_preempt_needs_cpu(int cpu)
        return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
 }
 
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ */
+void rcu_barrier(void)
+{
+       _rcu_barrier(&rcu_preempt_state, call_rcu);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
 /*
  * Initialize preemptable RCU's per-CPU data.
  */
@@ -332,6 +445,22 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
+ * Move preemptable RCU's callbacks to ->orphan_cbs_list.
+ */
+static void rcu_preempt_send_cbs_to_orphanage(void)
+{
+       rcu_send_cbs_to_orphanage(&rcu_preempt_state);
+}
+
+/*
+ * Initialize preemptable RCU's state structures.
+ */
+static void __init __rcu_init_preempt(void)
+{
+       RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
+}
+
+/*
  * Check for a task exiting while in a preemptable-RCU read-side
  * critical section, clean up if so.  No need to issue warnings,
  * as debug_check_no_locks_held() already does this if lockdep
@@ -370,10 +499,19 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * Because preemptable RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
  */
-static void rcu_preempt_qs(int cpu)
+static void rcu_preempt_note_context_switch(int cpu)
 {
 }
 
+/*
+ * Because preemptable RCU does not exist, there are never any preempted
+ * RCU readers.
+ */
+static int rcu_preempted_readers(struct rcu_node *rnp)
+{
+       return 0;
+}
+
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 
 /*
@@ -387,19 +525,45 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /*
- * Because preemptable RCU does not exist, there are never any preempted
- * RCU readers.
+ * Because there is no preemptable RCU, there can be no readers blocked,
+ * so there is no need to check for blocked tasks.  So check only for
+ * bogus qsmask values.
  */
-static int rcu_preempted_readers(struct rcu_node *rnp)
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
+{
+       WARN_ON_ONCE(rnp->qsmask);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Because preemptable RCU does not exist, it never needs to migrate
+ * tasks that were blocked within RCU read-side critical sections, and
+ * such non-existent tasks cannot possibly have been blocking the current
+ * grace period.
+ */
+static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
+                                    struct rcu_node *rnp,
+                                    struct rcu_data *rdp)
 {
        return 0;
 }
 
 /*
+ * Because preemptable RCU does not exist, it never needs CPU-offline
+ * processing.
+ */
+static void rcu_preempt_offline_cpu(int cpu)
+{
+}
+
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
  * Because preemptable RCU does not exist, it never has any callbacks
  * to check.
  */
-void rcu_preempt_check_callbacks(int cpu)
+static void rcu_preempt_check_callbacks(int cpu)
 {
 }
 
@@ -407,7 +571,7 @@ void rcu_preempt_check_callbacks(int cpu)
  * Because preemptable RCU does not exist, it never has any callbacks
  * to process.
  */
-void rcu_preempt_process_callbacks(void)
+static void rcu_preempt_process_callbacks(void)
 {
 }
 
@@ -421,6 +585,16 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 EXPORT_SYMBOL_GPL(call_rcu);
 
 /*
+ * Wait for an rcu-preempt grace period, but make it happen quickly.
+ * But because preemptable RCU does not exist, map to rcu-sched.
+ */
+void synchronize_rcu_expedited(void)
+{
+       synchronize_sched_expedited();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
+/*
  * Because preemptable RCU does not exist, it never has any work to do.
  */
 static int rcu_preempt_pending(int cpu)
@@ -437,6 +611,16 @@ static int rcu_preempt_needs_cpu(int cpu)
 }
 
 /*
+ * Because preemptable RCU does not exist, rcu_barrier() is just
+ * another name for rcu_barrier_sched().
+ */
+void rcu_barrier(void)
+{
+       rcu_barrier_sched();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/*
  * Because preemptable RCU does not exist, there is no per-CPU
  * data to initialize.
  */
@@ -444,4 +628,18 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 {
 }
 
+/*
+ * Because there is no preemptable RCU, there are no callbacks to move.
+ */
+static void rcu_preempt_send_cbs_to_orphanage(void)
+{
+}
+
+/*
+ * Because preemptable RCU does not exist, it need not be initialized.
+ */
+static void __init __rcu_init_preempt(void)
+{
+}
+
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */