+/**
+ * synchronize_rcu - wait until a grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full grace
+ * period has elapsed, in other words after all currently executing RCU
+ * read-side critical sections have completed. RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ */
+void synchronize_rcu(void)
+{
+ struct rcu_synchronize rcu;
+
+ if (!rcu_scheduler_active)
+ return;
+
+ init_completion(&rcu.completion);
+ /* Will wake me after RCU finished. */
+ call_rcu(&rcu.head, wakeme_after_rcu);
+ /* Wait for it. */
+ wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu);
+
+static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
+static long sync_rcu_preempt_exp_count;
+static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
+
+/*
+ * Return non-zero if there are any tasks in RCU read-side critical
+ * sections blocking the current preemptible-RCU expedited grace period.
+ * If there is no preemptible-RCU expedited grace period currently in
+ * progress, returns zero unconditionally.
+ */
+static int rcu_preempted_readers_exp(struct rcu_node *rnp)
+{
+ return !list_empty(&rnp->blocked_tasks[2]) ||
+ !list_empty(&rnp->blocked_tasks[3]);
+}
+
+/*
+ * return non-zero if there is no RCU expedited grace period in progress
+ * for the specified rcu_node structure, in other words, if all CPUs and
+ * tasks covered by the specified rcu_node structure have done their bit
+ * for the current expedited grace period. Works only for preemptible
+ * RCU -- other RCU implementation use other means.
+ *
+ * Caller must hold sync_rcu_preempt_exp_mutex.
+ */
+static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
+{
+ return !rcu_preempted_readers_exp(rnp) &&
+ ACCESS_ONCE(rnp->expmask) == 0;
+}
+
+/*
+ * Report the exit from RCU read-side critical section for the last task
+ * that queued itself during or before the current expedited preemptible-RCU
+ * grace period. This event is reported either to the rcu_node structure on
+ * which the task was queued or to one of that rcu_node structure's ancestors,
+ * recursively up the tree. (Calm down, calm down, we do the recursion
+ * iteratively!)
+ *
+ * Caller must hold sync_rcu_preempt_exp_mutex.
+ */
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+ unsigned long flags;
+ unsigned long mask;
+
+ spin_lock_irqsave(&rnp->lock, flags);
+ for (;;) {
+ if (!sync_rcu_preempt_exp_done(rnp))
+ break;
+ if (rnp->parent == NULL) {
+ wake_up(&sync_rcu_preempt_exp_wq);
+ break;
+ }
+ mask = rnp->grpmask;
+ spin_unlock(&rnp->lock); /* irqs remain disabled */
+ rnp = rnp->parent;
+ spin_lock(&rnp->lock); /* irqs already disabled */
+ rnp->expmask &= ~mask;
+ }
+ spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
+ * Snapshot the tasks blocking the newly started preemptible-RCU expedited
+ * grace period for the specified rcu_node structure. If there are no such
+ * tasks, report it up the rcu_node hierarchy.
+ *
+ * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
+ */
+static void
+sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+ int must_wait;
+
+ spin_lock(&rnp->lock); /* irqs already disabled */
+ list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
+ list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
+ must_wait = rcu_preempted_readers_exp(rnp);
+ spin_unlock(&rnp->lock); /* irqs remain disabled */
+ if (!must_wait)
+ rcu_report_exp_rnp(rsp, rnp);
+}
+
+/*
+ * Wait for an rcu-preempt grace period, but expedite it. The basic idea
+ * is to invoke synchronize_sched_expedited() to push all the tasks to
+ * the ->blocked_tasks[] lists, move all entries from the first set of
+ * ->blocked_tasks[] lists to the second set, and finally wait for this
+ * second set to drain.
+ */
+void synchronize_rcu_expedited(void)
+{
+ unsigned long flags;
+ struct rcu_node *rnp;
+ struct rcu_state *rsp = &rcu_preempt_state;
+ long snap;
+ int trycount = 0;
+
+ smp_mb(); /* Caller's modifications seen first by other CPUs. */
+ snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
+ smp_mb(); /* Above access cannot bleed into critical section. */
+
+ /*
+ * Acquire lock, falling back to synchronize_rcu() if too many
+ * lock-acquisition failures. Of course, if someone does the
+ * expedited grace period for us, just leave.
+ */
+ while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
+ if (trycount++ < 10)
+ udelay(trycount * num_online_cpus());
+ else {
+ synchronize_rcu();
+ return;
+ }
+ if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
+ goto mb_ret; /* Others did our work for us. */
+ }
+ if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
+ goto unlock_mb_ret; /* Others did our work for us. */
+
+ /* force all RCU readers onto blocked_tasks[]. */
+ synchronize_sched_expedited();
+
+ spin_lock_irqsave(&rsp->onofflock, flags);
+
+ /* Initialize ->expmask for all non-leaf rcu_node structures. */
+ rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
+ spin_lock(&rnp->lock); /* irqs already disabled. */
+ rnp->expmask = rnp->qsmaskinit;
+ spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ }
+
+ /* Snapshot current state of ->blocked_tasks[] lists. */
+ rcu_for_each_leaf_node(rsp, rnp)
+ sync_rcu_preempt_exp_init(rsp, rnp);
+ if (NUM_RCU_NODES > 1)
+ sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
+
+ spin_unlock_irqrestore(&rsp->onofflock, flags);
+
+ /* Wait for snapshotted ->blocked_tasks[] lists to drain. */
+ rnp = rcu_get_root(rsp);
+ wait_event(sync_rcu_preempt_exp_wq,
+ sync_rcu_preempt_exp_done(rnp));
+
+ /* Clean up and exit. */
+ smp_mb(); /* ensure expedited GP seen before counter increment. */
+ ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
+unlock_mb_ret:
+ mutex_unlock(&sync_rcu_preempt_exp_mutex);
+mb_ret:
+ smp_mb(); /* ensure subsequent action seen after grace period. */
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+