X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Frcutree_plugin.h;h=0e4f420245d97369b7fdf6bb99815789ffe13233;hb=a0de055cf61338549b13079a5677ef2e1b6472ef;hp=c0cb783aa16af58abb1131449183508bbd9a3402;hpb=e74f4c4564455c91a3b4075bb1721993c2a95dda;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c0cb783..0e4f420 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -24,19 +24,61 @@ * Paul E. McKenney */ +#include + +/* + * Check the RCU kernel configuration parameters and print informative + * messages about anything out of the ordinary. If you like #ifdef, you + * will love this function. + */ +static void __init rcu_bootup_announce_oddness(void) +{ +#ifdef CONFIG_RCU_TRACE + printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); +#endif +#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) + printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", + CONFIG_RCU_FANOUT); +#endif +#ifdef CONFIG_RCU_FANOUT_EXACT + printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); +#endif +#ifdef CONFIG_RCU_FAST_NO_HZ + printk(KERN_INFO + "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); +#endif +#ifdef CONFIG_PROVE_RCU + printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); +#endif +#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE + printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); +#endif +#ifndef CONFIG_RCU_CPU_STALL_DETECTOR + printk(KERN_INFO + "\tRCU-based detection of stalled CPUs is disabled.\n"); +#endif +#ifndef CONFIG_RCU_CPU_STALL_VERBOSE + printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); +#endif +#if NUM_RCU_LVL_4 != 0 + printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); +#endif +} #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); +static int rcu_preempted_readers_exp(struct rcu_node *rnp); + /* * Tell them what RCU they are running. */ -static inline void rcu_bootup_announce(void) +static void __init rcu_bootup_announce(void) { - printk(KERN_INFO - "Experimental preemptable hierarchical RCU implementation.\n"); + printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n"); + rcu_bootup_announce_oddness(); } /* @@ -59,17 +101,32 @@ long rcu_batches_completed(void) EXPORT_SYMBOL_GPL(rcu_batches_completed); /* + * Force a quiescent state for preemptible RCU. + */ +void rcu_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_preempt_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + +/* * Record a preemptable-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is * not in a quiescent state. There might be any number of tasks blocked * while in an RCU read-side critical section. + * + * Unlike the other rcu_*_qs() functions, callers to this function + * must disable irqs in order to protect the assignment to + * ->rcu_read_unlock_special. */ static void rcu_preempt_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - rdp->passed_quiesc_completed = rdp->completed; + + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; + current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; } /* @@ -99,7 +156,7 @@ static void rcu_preempt_note_context_switch(int cpu) /* Possibly blocking in an RCU read-side critical section. */ rdp = rcu_preempt_state.rda[cpu]; rnp = rdp->mynode; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; t->rcu_blocked_node = rnp; @@ -120,7 +177,7 @@ static void rcu_preempt_note_context_switch(int cpu) WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -132,9 +189,8 @@ static void rcu_preempt_note_context_switch(int cpu) * grace period, then the fact that the task has been enqueued * means that we continue to block the current grace period. */ - rcu_preempt_qs(cpu); local_irq_save(flags); - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + rcu_preempt_qs(cpu); local_irq_restore(flags); } @@ -157,14 +213,58 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); */ static int rcu_preempted_readers(struct rcu_node *rnp) { - return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); + int phase = rnp->gpnum & 0x1; + + return !list_empty(&rnp->blocked_tasks[phase]) || + !list_empty(&rnp->blocked_tasks[phase + 2]); } +/* + * Record a quiescent state for all tasks that were previously queued + * on the specified rcu_node structure and that were blocking the current + * RCU grace period. The caller must hold the specified rnp->lock with + * irqs disabled, and this lock is released upon return, but irqs remain + * disabled. + */ +static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) + __releases(rnp->lock) +{ + unsigned long mask; + struct rcu_node *rnp_p; + + if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; /* Still need more quiescent states! */ + } + + rnp_p = rnp->parent; + if (rnp_p == NULL) { + /* + * Either there is only one rcu_node in the tree, + * or tasks were kicked up to root rcu_node due to + * CPUs going offline. + */ + rcu_report_qs_rsp(&rcu_preempt_state, flags); + return; + } + + /* Report up the rest of the hierarchy. */ + mask = rnp->grpmask; + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ + rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); +} + +/* + * Handle special cases during rcu_read_unlock(), such as needing to + * notify RCU core processing or task having blocked during the RCU + * read-side critical section. + */ static void rcu_read_unlock_special(struct task_struct *t) { int empty; + int empty_exp; unsigned long flags; - unsigned long mask; struct rcu_node *rnp; int special; @@ -180,7 +280,6 @@ static void rcu_read_unlock_special(struct task_struct *t) */ special = t->rcu_read_unlock_special; if (special & RCU_READ_UNLOCK_NEED_QS) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; rcu_preempt_qs(smp_processor_id()); } @@ -201,42 +300,36 @@ static void rcu_read_unlock_special(struct task_struct *t) */ for (;;) { rnp = t->rcu_blocked_node; - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ if (rnp == t->rcu_blocked_node) break; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty = !rcu_preempted_readers(rnp); + empty_exp = !rcu_preempted_readers_exp(rnp); + smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ list_del_init(&t->rcu_node_entry); t->rcu_blocked_node = NULL; /* * If this was the last task on the current list, and if * we aren't waiting on any CPUs, report the quiescent state. - * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk() - * drop rnp->lock and restore irq. + * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. */ - if (!empty && rnp->qsmask == 0 && - !rcu_preempted_readers(rnp)) { - struct rcu_node *rnp_p; - - if (rnp->parent == NULL) { - /* Only one rcu_node in the tree. */ - cpu_quiet_msk_finish(&rcu_preempt_state, flags); - return; - } - /* Report up the rest of the hierarchy. */ - mask = rnp->grpmask; - spin_unlock_irqrestore(&rnp->lock, flags); - rnp_p = rnp->parent; - spin_lock_irqsave(&rnp_p->lock, flags); - WARN_ON_ONCE(rnp->qsmask); - cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags); - return; - } - spin_unlock(&rnp->lock); + if (empty) + raw_spin_unlock_irqrestore(&rnp->lock, flags); + else + rcu_report_unblock_qs_rnp(rnp, flags); + + /* + * If this was the last task on the expedited lists, + * then we need to report up the rcu_node hierarchy. + */ + if (!empty_exp && !rcu_preempted_readers_exp(rnp)) + rcu_report_exp_rnp(&rcu_preempt_state, rnp); + } else { + local_irq_restore(flags); } - local_irq_restore(flags); } /* @@ -254,29 +347,73 @@ void __rcu_read_unlock(void) if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) rcu_read_unlock_special(t); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ } EXPORT_SYMBOL_GPL(__rcu_read_unlock); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#ifdef CONFIG_RCU_CPU_STALL_VERBOSE + +/* + * Dump detailed information for all tasks blocking the current RCU + * grace period on the specified rcu_node structure. + */ +static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) +{ + unsigned long flags; + struct list_head *lp; + int phase; + struct task_struct *t; + + if (rcu_preempted_readers(rnp)) { + raw_spin_lock_irqsave(&rnp->lock, flags); + phase = rnp->gpnum & 0x1; + lp = &rnp->blocked_tasks[phase]; + list_for_each_entry(t, lp, rcu_node_entry) + sched_show_task(t); + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } +} + +/* + * Dump detailed information for all tasks blocking the current RCU + * grace period. + */ +static void rcu_print_detail_task_stall(struct rcu_state *rsp) +{ + struct rcu_node *rnp = rcu_get_root(rsp); + + rcu_print_detail_task_stall_rnp(rnp); + rcu_for_each_leaf_node(rsp, rnp) + rcu_print_detail_task_stall_rnp(rnp); +} + +#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ + +static void rcu_print_detail_task_stall(struct rcu_state *rsp) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ + /* * Scan the current list of tasks blocked within RCU read-side critical * sections, printing out the tid of each. */ static void rcu_print_task_stall(struct rcu_node *rnp) { - unsigned long flags; struct list_head *lp; int phase; struct task_struct *t; if (rcu_preempted_readers(rnp)) { - spin_lock_irqsave(&rnp->lock, flags); phase = rnp->gpnum & 0x1; lp = &rnp->blocked_tasks[phase]; list_for_each_entry(t, lp, rcu_node_entry) printk(" P%d", t->pid); - spin_unlock_irqrestore(&rnp->lock, flags); } } @@ -303,26 +440,34 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) * rcu_node. The reason for not just moving them to the immediate * parent is to remove the need for rcu_read_unlock_special() to * make more than two attempts to acquire the target rcu_node's lock. + * Returns true if there were tasks blocking the current RCU grace + * period. + * + * Returns 1 if there was previously a task blocking the current grace + * period on the specified rcu_node structure. * * The caller must hold rnp->lock with irqs disabled. */ -static void rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp, - struct rcu_data *rdp) +static int rcu_preempt_offline_tasks(struct rcu_state *rsp, + struct rcu_node *rnp, + struct rcu_data *rdp) { int i; struct list_head *lp; struct list_head *lp_root; + int retval = 0; struct rcu_node *rnp_root = rcu_get_root(rsp); struct task_struct *tp; if (rnp == rnp_root) { WARN_ONCE(1, "Last CPU thought to be offlined?"); - return; /* Shouldn't happen: at least one CPU online. */ + return 0; /* Shouldn't happen: at least one CPU online. */ } WARN_ON_ONCE(rnp != rdp->mynode && (!list_empty(&rnp->blocked_tasks[0]) || - !list_empty(&rnp->blocked_tasks[1]))); + !list_empty(&rnp->blocked_tasks[1]) || + !list_empty(&rnp->blocked_tasks[2]) || + !list_empty(&rnp->blocked_tasks[3]))); /* * Move tasks up to root rcu_node. Rely on the fact that the @@ -330,18 +475,23 @@ static void rcu_preempt_offline_tasks(struct rcu_state *rsp, * rcu_nodes in terms of gp_num value. This fact allows us to * move the blocked_tasks[] array directly, element by element. */ - for (i = 0; i < 2; i++) { + if (rcu_preempted_readers(rnp)) + retval |= RCU_OFL_TASKS_NORM_GP; + if (rcu_preempted_readers_exp(rnp)) + retval |= RCU_OFL_TASKS_EXP_GP; + for (i = 0; i < 4; i++) { lp = &rnp->blocked_tasks[i]; lp_root = &rnp_root->blocked_tasks[i]; while (!list_empty(lp)) { tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); - spin_lock(&rnp_root->lock); /* irqs already disabled */ + raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ list_del(&tp->rcu_node_entry); tp->rcu_blocked_node = rnp_root; list_add(&tp->rcu_node_entry, lp_root); - spin_unlock(&rnp_root->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ } } + return retval; } /* @@ -366,7 +516,6 @@ static void rcu_preempt_check_callbacks(int cpu) struct task_struct *t = current; if (t->rcu_read_lock_nesting == 0) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; rcu_preempt_qs(cpu); return; } @@ -392,6 +541,188 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu); +/** + * synchronize_rcu - wait until a grace period has elapsed. + * + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void synchronize_rcu(void) +{ + struct rcu_synchronize rcu; + + if (!rcu_scheduler_active) + return; + + init_rcu_head_on_stack(&rcu.head); + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); +} +EXPORT_SYMBOL_GPL(synchronize_rcu); + +static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); +static long sync_rcu_preempt_exp_count; +static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); + +/* + * Return non-zero if there are any tasks in RCU read-side critical + * sections blocking the current preemptible-RCU expedited grace period. + * If there is no preemptible-RCU expedited grace period currently in + * progress, returns zero unconditionally. + */ +static int rcu_preempted_readers_exp(struct rcu_node *rnp) +{ + return !list_empty(&rnp->blocked_tasks[2]) || + !list_empty(&rnp->blocked_tasks[3]); +} + +/* + * return non-zero if there is no RCU expedited grace period in progress + * for the specified rcu_node structure, in other words, if all CPUs and + * tasks covered by the specified rcu_node structure have done their bit + * for the current expedited grace period. Works only for preemptible + * RCU -- other RCU implementation use other means. + * + * Caller must hold sync_rcu_preempt_exp_mutex. + */ +static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) +{ + return !rcu_preempted_readers_exp(rnp) && + ACCESS_ONCE(rnp->expmask) == 0; +} + +/* + * Report the exit from RCU read-side critical section for the last task + * that queued itself during or before the current expedited preemptible-RCU + * grace period. This event is reported either to the rcu_node structure on + * which the task was queued or to one of that rcu_node structure's ancestors, + * recursively up the tree. (Calm down, calm down, we do the recursion + * iteratively!) + * + * Caller must hold sync_rcu_preempt_exp_mutex. + */ +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) +{ + unsigned long flags; + unsigned long mask; + + raw_spin_lock_irqsave(&rnp->lock, flags); + for (;;) { + if (!sync_rcu_preempt_exp_done(rnp)) + break; + if (rnp->parent == NULL) { + wake_up(&sync_rcu_preempt_exp_wq); + break; + } + mask = rnp->grpmask; + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ + rnp = rnp->parent; + raw_spin_lock(&rnp->lock); /* irqs already disabled */ + rnp->expmask &= ~mask; + } + raw_spin_unlock_irqrestore(&rnp->lock, flags); +} + +/* + * Snapshot the tasks blocking the newly started preemptible-RCU expedited + * grace period for the specified rcu_node structure. If there are no such + * tasks, report it up the rcu_node hierarchy. + * + * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. + */ +static void +sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) +{ + int must_wait; + + raw_spin_lock(&rnp->lock); /* irqs already disabled */ + list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); + list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); + must_wait = rcu_preempted_readers_exp(rnp); + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ + if (!must_wait) + rcu_report_exp_rnp(rsp, rnp); +} + +/* + * Wait for an rcu-preempt grace period, but expedite it. The basic idea + * is to invoke synchronize_sched_expedited() to push all the tasks to + * the ->blocked_tasks[] lists, move all entries from the first set of + * ->blocked_tasks[] lists to the second set, and finally wait for this + * second set to drain. + */ +void synchronize_rcu_expedited(void) +{ + unsigned long flags; + struct rcu_node *rnp; + struct rcu_state *rsp = &rcu_preempt_state; + long snap; + int trycount = 0; + + smp_mb(); /* Caller's modifications seen first by other CPUs. */ + snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; + smp_mb(); /* Above access cannot bleed into critical section. */ + + /* + * Acquire lock, falling back to synchronize_rcu() if too many + * lock-acquisition failures. Of course, if someone does the + * expedited grace period for us, just leave. + */ + while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { + if (trycount++ < 10) + udelay(trycount * num_online_cpus()); + else { + synchronize_rcu(); + return; + } + if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) + goto mb_ret; /* Others did our work for us. */ + } + if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) + goto unlock_mb_ret; /* Others did our work for us. */ + + /* force all RCU readers onto blocked_tasks[]. */ + synchronize_sched_expedited(); + + raw_spin_lock_irqsave(&rsp->onofflock, flags); + + /* Initialize ->expmask for all non-leaf rcu_node structures. */ + rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->expmask = rnp->qsmaskinit; + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + } + + /* Snapshot current state of ->blocked_tasks[] lists. */ + rcu_for_each_leaf_node(rsp, rnp) + sync_rcu_preempt_exp_init(rsp, rnp); + if (NUM_RCU_NODES > 1) + sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); + + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + + /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ + rnp = rcu_get_root(rsp); + wait_event(sync_rcu_preempt_exp_wq, + sync_rcu_preempt_exp_done(rnp)); + + /* Clean up and exit. */ + smp_mb(); /* ensure expedited GP seen before counter increment. */ + ACCESS_ONCE(sync_rcu_preempt_exp_count)++; +unlock_mb_ret: + mutex_unlock(&sync_rcu_preempt_exp_mutex); +mb_ret: + smp_mb(); /* ensure subsequent action seen after grace period. */ +} +EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); + /* * Check to see if there is any immediate preemptable-RCU-related work * to be done. @@ -464,9 +795,10 @@ void exit_rcu(void) /* * Tell them what RCU they are running. */ -static inline void rcu_bootup_announce(void) +static void __init rcu_bootup_announce(void) { printk(KERN_INFO "Hierarchical RCU implementation.\n"); + rcu_bootup_announce_oddness(); } /* @@ -479,6 +811,16 @@ long rcu_batches_completed(void) EXPORT_SYMBOL_GPL(rcu_batches_completed); /* + * Force a quiescent state for RCU, which, because there is no preemptible + * RCU, becomes the same as rcu-sched. + */ +void rcu_force_quiescent_state(void) +{ + rcu_sched_force_quiescent_state(); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + +/* * Because preemptable RCU does not exist, we never have to check for * CPUs being in quiescent states. */ @@ -495,12 +837,30 @@ static int rcu_preempted_readers(struct rcu_node *rnp) return 0; } +#ifdef CONFIG_HOTPLUG_CPU + +/* Because preemptible RCU does not exist, no quieting of tasks. */ +static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) +{ + raw_spin_unlock_irqrestore(&rnp->lock, flags); +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + #ifdef CONFIG_RCU_CPU_STALL_DETECTOR /* * Because preemptable RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections. */ +static void rcu_print_detail_task_stall(struct rcu_state *rsp) +{ +} + +/* + * Because preemptable RCU does not exist, we never have to check for + * tasks blocked within RCU read-side critical sections. + */ static void rcu_print_task_stall(struct rcu_node *rnp) { } @@ -521,12 +881,15 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) /* * Because preemptable RCU does not exist, it never needs to migrate - * tasks that were blocked within RCU read-side critical sections. + * tasks that were blocked within RCU read-side critical sections, and + * such non-existent tasks cannot possibly have been blocking the current + * grace period. */ -static void rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp, - struct rcu_data *rdp) +static int rcu_preempt_offline_tasks(struct rcu_state *rsp, + struct rcu_node *rnp, + struct rcu_data *rdp) { + return 0; } /* @@ -565,6 +928,30 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) EXPORT_SYMBOL_GPL(call_rcu); /* + * Wait for an rcu-preempt grace period, but make it happen quickly. + * But because preemptable RCU does not exist, map to rcu-sched. + */ +void synchronize_rcu_expedited(void) +{ + synchronize_sched_expedited(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); + +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Because preemptable RCU does not exist, there is never any need to + * report on tasks preempted in RCU read-side critical sections during + * expedited RCU grace periods. + */ +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) +{ + return; +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + +/* * Because preemptable RCU does not exist, it never has any work to do. */ static int rcu_preempt_pending(int cpu) @@ -613,3 +1000,123 @@ static void __init __rcu_init_preempt(void) } #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ + +#if !defined(CONFIG_RCU_FAST_NO_HZ) + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + * + * Because we have preemptible RCU, just check whether this CPU needs + * any flavor of RCU. Do not chew up lots of CPU cycles with preemption + * disabled in a most-likely vain attempt to cause RCU not to need this CPU. + */ +int rcu_needs_cpu(int cpu) +{ + return rcu_needs_cpu_quick_check(cpu); +} + +/* + * Check to see if we need to continue a callback-flush operations to + * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle + * entry is not configured, so we never do need to. + */ +static void rcu_needs_cpu_flush(void) +{ +} + +#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ + +#define RCU_NEEDS_CPU_FLUSHES 5 +static DEFINE_PER_CPU(int, rcu_dyntick_drain); +static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + * + * Because we are not supporting preemptible RCU, attempt to accelerate + * any current grace periods so that RCU no longer needs this CPU, but + * only if all other CPUs are already in dynticks-idle mode. This will + * allow the CPU cores to be powered down immediately, as opposed to after + * waiting many milliseconds for grace periods to elapse. + * + * Because it is not legal to invoke rcu_process_callbacks() with irqs + * disabled, we do one pass of force_quiescent_state(), then do a + * raise_softirq() to cause rcu_process_callbacks() to be invoked later. + * The per-cpu rcu_dyntick_drain variable controls the sequencing. + */ +int rcu_needs_cpu(int cpu) +{ + int c = 0; + int snap; + int snap_nmi; + int thatcpu; + + /* Check for being in the holdoff period. */ + if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) + return rcu_needs_cpu_quick_check(cpu); + + /* Don't bother unless we are the last non-dyntick-idle CPU. */ + for_each_online_cpu(thatcpu) { + if (thatcpu == cpu) + continue; + snap = per_cpu(rcu_dynticks, thatcpu).dynticks; + snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; + smp_mb(); /* Order sampling of snap with end of grace period. */ + if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { + per_cpu(rcu_dyntick_drain, cpu) = 0; + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; + return rcu_needs_cpu_quick_check(cpu); + } + } + + /* Check and update the rcu_dyntick_drain sequencing. */ + if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { + /* First time through, initialize the counter. */ + per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; + } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { + /* We have hit the limit, so time to give up. */ + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; + return rcu_needs_cpu_quick_check(cpu); + } + + /* Do one step pushing remaining RCU callbacks through. */ + if (per_cpu(rcu_sched_data, cpu).nxtlist) { + rcu_sched_qs(cpu); + force_quiescent_state(&rcu_sched_state, 0); + c = c || per_cpu(rcu_sched_data, cpu).nxtlist; + } + if (per_cpu(rcu_bh_data, cpu).nxtlist) { + rcu_bh_qs(cpu); + force_quiescent_state(&rcu_bh_state, 0); + c = c || per_cpu(rcu_bh_data, cpu).nxtlist; + } + + /* If RCU callbacks are still pending, RCU still needs this CPU. */ + if (c) + raise_softirq(RCU_SOFTIRQ); + return c; +} + +/* + * Check to see if we need to continue a callback-flush operations to + * allow the last CPU to enter dyntick-idle mode. + */ +static void rcu_needs_cpu_flush(void) +{ + int cpu = smp_processor_id(); + unsigned long flags; + + if (per_cpu(rcu_dyntick_drain, cpu) <= 0) + return; + local_irq_save(flags); + (void)rcu_needs_cpu(cpu); + local_irq_restore(flags); +} + +#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */