X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Frcutree.c;h=d4437345706f8a539eee6dba201662fd791978ef;hb=31f46717997a83bdf6db0dd04810c0a329eb3148;hp=41688ff60e0729b8486961b176b18240284e547f;hpb=07079d5357a4d53c2b13126c4a38fb40e6e04966;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 41688ff..d443734 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -54,8 +54,8 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; -#define RCU_STATE_INITIALIZER(name) { \ - .level = { &name.node[0] }, \ +#define RCU_STATE_INITIALIZER(structname) { \ + .level = { &structname.node[0] }, \ .levelcnt = { \ NUM_RCU_LVL_0, /* root of hierarchy. */ \ NUM_RCU_LVL_1, \ @@ -66,13 +66,14 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ + .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ .orphan_cbs_list = NULL, \ - .orphan_cbs_tail = &name.orphan_cbs_list, \ + .orphan_cbs_tail = &structname.orphan_cbs_list, \ .orphan_qlen = 0, \ - .fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \ + .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ + .name = #structname, \ } struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); @@ -81,8 +82,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); -static int rcu_scheduler_active __read_mostly; - +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s @@ -101,25 +102,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) */ void rcu_sched_qs(int cpu) { - struct rcu_data *rdp; + struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); - rdp = &per_cpu(rcu_sched_data, cpu); rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; - rcu_preempt_note_context_switch(cpu); } void rcu_bh_qs(int cpu) { - struct rcu_data *rdp; + struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - rdp = &per_cpu(rcu_bh_data, cpu); rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; } +/* + * Note a context switch. This is a quiescent state for RCU-sched, + * and requires special handling for preemptible RCU. + */ +void rcu_note_context_switch(int cpu) +{ + rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(cpu); +} + #ifdef CONFIG_NO_HZ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = 1, @@ -157,6 +165,24 @@ long rcu_batches_completed_bh(void) EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); /* + * Force a quiescent state for RCU BH. + */ +void rcu_bh_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_bh_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); + +/* + * Force a quiescent state for RCU-sched. + */ +void rcu_sched_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_sched_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); + +/* * Does the CPU have callbacks ready to be invoked? */ static int @@ -424,6 +450,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) #ifdef CONFIG_RCU_CPU_STALL_DETECTOR +int rcu_cpu_stall_panicking __read_mostly; + static void record_gp_stall_check_time(struct rcu_state *rsp) { rsp->gp_start = jiffies; @@ -439,10 +467,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp) /* Only let one CPU complain about others per time interval. */ - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); delta = jiffies - rsp->jiffies_stall; if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; @@ -452,23 +480,30 @@ static void print_other_cpu_stall(struct rcu_state *rsp) * due to CPU offlining. */ rcu_print_task_stall(rnp); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); /* OK, time to rat on our buddy... */ - printk(KERN_ERR "INFO: RCU detected CPU stalls:"); + printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", + rsp->name); rcu_for_each_leaf_node(rsp, rnp) { + raw_spin_lock_irqsave(&rnp->lock, flags); rcu_print_task_stall(rnp); + raw_spin_unlock_irqrestore(&rnp->lock, flags); if (rnp->qsmask == 0) continue; for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) if (rnp->qsmask & (1UL << cpu)) printk(" %d", rnp->grplo + cpu); } - printk(" (detected by %d, t=%ld jiffies)\n", + printk("} (detected by %d, t=%ld jiffies)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start)); trigger_all_cpu_backtrace(); + /* If so configured, complain about tasks blocking the grace period. */ + + rcu_print_detail_task_stall(rsp); + force_quiescent_state(rsp, 0); /* Kick them all. */ } @@ -477,15 +512,15 @@ static void print_cpu_stall(struct rcu_state *rsp) unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); - printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n", - smp_processor_id(), jiffies - rsp->gp_start); + printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", + rsp->name, smp_processor_id(), jiffies - rsp->gp_start); trigger_all_cpu_backtrace(); - spin_lock_irqsave(&rnp->lock, flags); - if ((long)(jiffies - rsp->jiffies_stall) >= 0) + raw_spin_lock_irqsave(&rnp->lock, flags); + if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); set_need_resched(); /* kick ourselves to get things going. */ } @@ -495,6 +530,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) long delta; struct rcu_node *rnp; + if (rcu_cpu_stall_panicking) + return; delta = jiffies - rsp->jiffies_stall; rnp = rdp->mynode; if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { @@ -509,6 +546,21 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) } } +static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) +{ + rcu_cpu_stall_panicking = 1; + return NOTIFY_DONE; +} + +static struct notifier_block rcu_panic_block = { + .notifier_call = rcu_panic, +}; + +static void __init check_cpu_stall_init(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); +} + #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ static void record_gp_stall_check_time(struct rcu_state *rsp) @@ -519,6 +571,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { } +static void __init check_cpu_stall_init(void) +{ +} + #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* @@ -545,12 +601,12 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_save(flags); rnp = rdp->mynode; if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ - !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ local_irq_restore(flags); return; } __note_new_gpnum(rsp, rnp, rdp); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -609,12 +665,12 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_save(flags); rnp = rdp->mynode; if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ - !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ local_irq_restore(flags); return; } __rcu_process_gp_end(rsp, rnp, rdp); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -660,11 +716,13 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_node *rnp = rcu_get_root(rsp); if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { + if (cpu_needs_another_gp(rsp, rdp)) + rsp->fqs_need_gp = 1; if (rnp->completed == rsp->completed) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* * Propagate new ->completed value to rcu_node structures @@ -672,9 +730,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) * of the next grace period to process their callbacks. */ rcu_for_each_node_breadth_first(rsp, rnp) { - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->completed = rsp->completed; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } local_irq_restore(flags); return; @@ -695,15 +753,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rnp->completed = rsp->completed; rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ rcu_start_gp_per_cpu(rsp, rnp, rdp); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - spin_unlock(&rnp->lock); /* leave irqs disabled. */ + raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ /* Exclude any concurrent CPU-hotplug operations. */ - spin_lock(&rsp->onofflock); /* irqs already disabled. */ + raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ /* * Set the quiescent-state-needed bits in all the rcu_node @@ -723,21 +781,21 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) * irqs disabled. */ rcu_for_each_node_breadth_first(rsp, rnp) { - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; rnp->completed = rsp->completed; if (rnp == rdp->mynode) rcu_start_gp_per_cpu(rsp, rnp, rdp); - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } rnp = rcu_get_root(rsp); - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ - spin_unlock(&rnp->lock); /* irqs remain disabled. */ - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } /* @@ -776,14 +834,14 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, if (!(rnp->qsmask & mask)) { /* Our bit has already been cleared, so done. */ - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } rnp->qsmask &= ~mask; if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { /* Other bits still set at this level, so done. */ - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } mask = rnp->grpmask; @@ -793,10 +851,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, break; } - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); rnp_c = rnp; rnp = rnp->parent; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); WARN_ON_ONCE(rnp_c->qsmask); } @@ -825,7 +883,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las struct rcu_node *rnp; rnp = rdp->mynode; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); if (lastcomp != rnp->completed) { /* @@ -837,12 +895,12 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las * race occurred. */ rdp->passed_quiesc = 0; /* try again later! */ - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } mask = rdp->grpmask; if ((rnp->qsmask & mask) == 0) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { rdp->qs_pending = 0; @@ -906,7 +964,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) if (rdp->nxtlist == NULL) return; /* irqs disabled, so comparison is stable. */ - spin_lock(&rsp->onofflock); /* irqs already disabled. */ + raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ *rsp->orphan_cbs_tail = rdp->nxtlist; rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; rdp->nxtlist = NULL; @@ -914,7 +972,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) rdp->nxttail[i] = &rdp->nxtlist; rsp->orphan_qlen += rdp->qlen; rdp->qlen = 0; - spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ } /* @@ -925,10 +983,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) unsigned long flags; struct rcu_data *rdp; - spin_lock_irqsave(&rsp->onofflock, flags); + raw_spin_lock_irqsave(&rsp->onofflock, flags); rdp = rsp->rda[smp_processor_id()]; if (rsp->orphan_cbs_list == NULL) { - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); return; } *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; @@ -937,7 +995,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) rsp->orphan_cbs_list = NULL; rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; rsp->orphan_qlen = 0; - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } /* @@ -953,23 +1011,23 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) struct rcu_node *rnp; /* Exclude any attempts to start a new grace period. */ - spin_lock_irqsave(&rsp->onofflock, flags); + raw_spin_lock_irqsave(&rsp->onofflock, flags); /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ mask = rdp->grpmask; /* rnp->grplo is constant. */ do { - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit &= ~mask; if (rnp->qsmaskinit != 0) { if (rnp != rdp->mynode) - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ break; } if (rnp == rdp->mynode) need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); else - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ mask = rnp->grpmask; rnp = rnp->parent; } while (rnp != NULL); @@ -980,12 +1038,12 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) * because invoking rcu_report_unblock_qs_rnp() with ->onofflock * held leads to deadlock. */ - spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ rnp = rdp->mynode; if (need_report & RCU_OFL_TASKS_NORM_GP) rcu_report_unblock_qs_rnp(rnp, flags); else - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); if (need_report & RCU_OFL_TASKS_EXP_GP) rcu_report_exp_rnp(rsp, rnp); @@ -1103,8 +1161,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) */ void rcu_check_callbacks(int cpu, int user) { - if (!rcu_pending(cpu)) - return; /* if nothing for RCU to do. */ if (user || (idle_cpu(cpu) && rcu_scheduler_active && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { @@ -1136,7 +1192,8 @@ void rcu_check_callbacks(int cpu, int user) rcu_bh_qs(cpu); } rcu_preempt_check_callbacks(cpu); - raise_softirq(RCU_SOFTIRQ); + if (rcu_pending(cpu)) + raise_softirq(RCU_SOFTIRQ); } #ifdef CONFIG_SMP @@ -1144,11 +1201,9 @@ void rcu_check_callbacks(int cpu, int user) /* * Scan the leaf rcu_node structures, processing dyntick state for any that * have not yet encountered a quiescent state, using the function specified. - * Returns 1 if the current grace period ends while scanning (possibly - * because we made it end). + * The caller must have suppressed start of new grace periods. */ -static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, - int (*f)(struct rcu_data *)) +static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) { unsigned long bit; int cpu; @@ -1158,13 +1213,13 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, rcu_for_each_leaf_node(rsp, rnp) { mask = 0; - spin_lock_irqsave(&rnp->lock, flags); - if (rnp->completed != lastcomp) { - spin_unlock_irqrestore(&rnp->lock, flags); - return 1; + raw_spin_lock_irqsave(&rnp->lock, flags); + if (!rcu_gp_in_progress(rsp)) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; } if (rnp->qsmask == 0) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); continue; } cpu = rnp->grplo; @@ -1173,15 +1228,14 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) mask |= bit; } - if (mask != 0 && rnp->completed == lastcomp) { + if (mask != 0) { /* rcu_report_qs_rnp() releases rnp->lock. */ rcu_report_qs_rnp(mask, rsp, rnp, flags); continue; } - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } - return 0; } /* @@ -1191,83 +1245,65 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, static void force_quiescent_state(struct rcu_state *rsp, int relaxed) { unsigned long flags; - long lastcomp; struct rcu_node *rnp = rcu_get_root(rsp); - u8 signaled; - u8 forcenow; - u8 gpdone; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ - if (!spin_trylock_irqsave(&rsp->fqslock, flags)) { + if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ return; /* Someone else is already on the job. */ } - if (relaxed && - (long)(rsp->jiffies_force_qs - jiffies) >= 0) + if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) goto unlock_fqs_ret; /* no emergency and done recently. */ rsp->n_force_qs++; - spin_lock(&rnp->lock); /* irqs already disabled */ - lastcomp = rsp->gpnum - 1; - signaled = rsp->signaled; + raw_spin_lock(&rnp->lock); /* irqs already disabled */ rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; - spin_unlock(&rnp->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ goto unlock_fqs_ret; /* no GP in progress, time updated. */ } rsp->fqs_active = 1; - switch (signaled) { + switch (rsp->signaled) { case RCU_GP_IDLE: case RCU_GP_INIT: break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: - - spin_unlock(&rnp->lock); /* irqs remain disabled */ if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) break; /* So gcc recognizes the dead code. */ - /* Record dyntick-idle state. */ - gpdone = rcu_process_dyntick(rsp, lastcomp, - dyntick_save_progress_counter); - spin_lock(&rnp->lock); /* irqs already disabled */ - if (gpdone) - break; - /* fall into next case. */ - - case RCU_SAVE_COMPLETED: + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ - /* Update state, record completion counter. */ - forcenow = 0; - if (lastcomp + 1 == rsp->gpnum && - lastcomp == rsp->completed && - rsp->signaled == signaled) { + /* Record dyntick-idle state. */ + force_qs_rnp(rsp, dyntick_save_progress_counter); + raw_spin_lock(&rnp->lock); /* irqs already disabled */ + if (rcu_gp_in_progress(rsp)) rsp->signaled = RCU_FORCE_QS; - rsp->completed_fqs = lastcomp; - forcenow = signaled == RCU_SAVE_COMPLETED; - } - if (!forcenow) - break; - /* fall into next case. */ + break; case RCU_FORCE_QS: /* Check dyntick-idle state, send IPI to laggarts. */ - spin_unlock(&rnp->lock); /* irqs remain disabled */ - gpdone = rcu_process_dyntick(rsp, rsp->completed_fqs, - rcu_implicit_dynticks_qs); + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ + force_qs_rnp(rsp, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ - spin_lock(&rnp->lock); /* irqs already disabled */ + raw_spin_lock(&rnp->lock); /* irqs already disabled */ break; } rsp->fqs_active = 0; - spin_unlock(&rnp->lock); /* irqs remain disabled */ + if (rsp->fqs_need_gp) { + raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ + rsp->fqs_need_gp = 0; + rcu_start_gp(rsp, flags); /* releases rnp->lock */ + return; + } + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ unlock_fqs_ret: - spin_unlock_irqrestore(&rsp->fqslock, flags); + raw_spin_unlock_irqrestore(&rsp->fqslock, flags); } #else /* #ifdef CONFIG_SMP */ @@ -1295,7 +1331,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) * If an RCU GP has gone long enough, go check for dyntick * idle CPUs and, if needed, send resched IPIs. */ - if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) + if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) force_quiescent_state(rsp, 1); /* @@ -1309,7 +1345,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) /* Does this CPU require a not-yet-started grace period? */ if (cpu_needs_another_gp(rsp, rdp)) { - spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); + raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); rcu_start_gp(rsp, flags); /* releases above lock */ } @@ -1340,6 +1376,9 @@ static void rcu_process_callbacks(struct softirq_action *unused) * grace-period manipulations above. */ smp_mb(); /* See above block comment. */ + + /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ + rcu_needs_cpu_flush(); } static void @@ -1374,7 +1413,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), unsigned long nestflag; struct rcu_node *rnp_root = rcu_get_root(rsp); - spin_lock_irqsave(&rnp_root->lock, nestflag); + raw_spin_lock_irqsave(&rnp_root->lock, nestflag); rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ } @@ -1392,7 +1431,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), force_quiescent_state(rsp, 0); rdp->n_force_qs_snap = rsp->n_force_qs; rdp->qlen_last_fqs_check = rdp->qlen; - } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) + } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) force_quiescent_state(rsp, 1); local_irq_restore(flags); } @@ -1445,11 +1484,13 @@ void synchronize_sched(void) if (rcu_blocking_is_gp()) return; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(synchronize_sched); @@ -1469,11 +1510,13 @@ void synchronize_rcu_bh(void) if (rcu_blocking_is_gp()) return; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_bh(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); @@ -1494,8 +1537,20 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) check_cpu_stall(rsp, rdp); /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->qs_pending) { + if (rdp->qs_pending && !rdp->passed_quiesc) { + + /* + * If force_quiescent_state() coming soon and this CPU + * needs a quiescent state, and this is either RCU-sched + * or RCU-bh, force a local reschedule. + */ rdp->n_rp_qs_pending++; + if (!rdp->preemptable && + ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, + jiffies)) + set_need_resched(); + } else if (rdp->qs_pending && rdp->passed_quiesc) { + rdp->n_rp_report_qs++; return 1; } @@ -1525,7 +1580,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Has an RCU GP gone long enough to send resched IPIs &c? */ if (rcu_gp_in_progress(rsp) && - ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) { + ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { rdp->n_rp_need_fqs++; return 1; } @@ -1550,10 +1605,9 @@ static int rcu_pending(int cpu) /* * Check to see if any future RCU-related work will need to be done * by the current CPU, even if none need be done immediately, returning - * 1 if so. This function is part of the RCU implementation; it is -not- - * an exported member of the RCU API. + * 1 if so. */ -int rcu_needs_cpu(int cpu) +static int rcu_needs_cpu_quick_check(int cpu) { /* RCU callbacks either ready or pending? */ return per_cpu(rcu_sched_data, cpu).nxtlist || @@ -1561,21 +1615,6 @@ int rcu_needs_cpu(int cpu) rcu_preempt_needs_cpu(cpu); } -/* - * This function is invoked towards the end of the scheduler's initialization - * process. Before this is called, the idle task might contain - * RCU read-side critical sections (during which time, this idle - * task is booting the system). After this function is called, the - * idle tasks are prohibited from containing RCU read-side critical - * sections. - */ -void rcu_scheduler_starting(void) -{ - WARN_ON(num_online_cpus() != 1); - WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; -} - static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); @@ -1664,7 +1703,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); rdp->nxtlist = NULL; for (i = 0; i < RCU_NEXT_SIZE; i++) @@ -1674,7 +1713,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->dynticks = &per_cpu(rcu_dynticks, cpu); #endif /* #ifdef CONFIG_NO_HZ */ rdp->cpu = cpu; - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -1692,7 +1731,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); rdp->passed_quiesc = 0; /* We could be racing with new GP, */ rdp->qs_pending = 1; /* so set up to respond to current GP. */ rdp->beenonline = 1; /* We have now been online. */ @@ -1700,7 +1739,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* * A new grace period might start here. If so, we won't be part @@ -1708,14 +1747,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) */ /* Exclude any attempts to start a new GP on large systems. */ - spin_lock(&rsp->onofflock); /* irqs already disabled. */ + raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ /* Add CPU to rcu_node bitmasks. */ rnp = rdp->mynode; mask = rdp->grpmask; do { /* Exclude any attempts to start a new GP on small systems. */ - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit |= mask; mask = rnp->grpmask; if (rnp == rdp->mynode) { @@ -1723,11 +1762,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) rdp->completed = rnp->completed; rdp->passed_quiesc_completed = rnp->completed - 1; } - spin_unlock(&rnp->lock); /* irqs already disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ rnp = rnp->parent; } while (rnp != NULL && !(rnp->qsmaskinit & mask)); - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } static void __cpuinit rcu_online_cpu(int cpu) @@ -1779,6 +1818,21 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, } /* + * This function is invoked towards the end of the scheduler's initialization + * process. Before this is called, the idle task might contain + * RCU read-side critical sections (during which time, this idle + * task is booting the system). After this function is called, the + * idle tasks are prohibited from containing RCU read-side critical + * sections. This function also enables RCU lockdep checking. + */ +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} + +/* * Compute the per-level fanout, either using the exact fanout specified * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. */ @@ -1811,11 +1865,17 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) */ static void __init rcu_init_one(struct rcu_state *rsp) { + static char *buf[] = { "rcu_node_level_0", + "rcu_node_level_1", + "rcu_node_level_2", + "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ int cpustride = 1; int i; int j; struct rcu_node *rnp; + BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ + /* Initialize the level-tracking arrays. */ for (i = 1; i < NUM_RCU_LVLS; i++) @@ -1828,8 +1888,9 @@ static void __init rcu_init_one(struct rcu_state *rsp) cpustride *= rsp->levelspread[i]; rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { - spin_lock_init(&rnp->lock); - lockdep_set_class(&rnp->lock, &rcu_node_class[i]); + raw_spin_lock_init(&rnp->lock); + lockdep_set_class_and_name(&rnp->lock, + &rcu_node_class[i], buf[i]); rnp->gpnum = 0; rnp->qsmask = 0; rnp->qsmaskinit = 0; @@ -1854,6 +1915,14 @@ static void __init rcu_init_one(struct rcu_state *rsp) INIT_LIST_HEAD(&rnp->blocked_tasks[3]); } } + + rnp = rsp->level[NUM_RCU_LVLS - 1]; + for_each_possible_cpu(i) { + while (i > rnp->grphi) + rnp++; + rsp->rda[i]->mynode = rnp; + rcu_boot_init_percpu_data(i, rsp); + } } /* @@ -1864,32 +1933,18 @@ static void __init rcu_init_one(struct rcu_state *rsp) #define RCU_INIT_FLAVOR(rsp, rcu_data) \ do { \ int i; \ - int j; \ - struct rcu_node *rnp; \ \ - rcu_init_one(rsp); \ - rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \ - j = 0; \ for_each_possible_cpu(i) { \ - if (i > rnp[j].grphi) \ - j++; \ - per_cpu(rcu_data, i).mynode = &rnp[j]; \ (rsp)->rda[i] = &per_cpu(rcu_data, i); \ - rcu_boot_init_percpu_data(i, rsp); \ } \ + rcu_init_one(rsp); \ } while (0) void __init rcu_init(void) { - int i; + int cpu; rcu_bootup_announce(); -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -#if NUM_RCU_LVL_4 != 0 - printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n"); -#endif /* #if NUM_RCU_LVL_4 != 0 */ RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); __rcu_init_preempt(); @@ -1901,8 +1956,9 @@ void __init rcu_init(void) * or the scheduler are operational. */ cpu_notifier(rcu_cpu_notify, 0); - for_each_online_cpu(i) - rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i); + for_each_online_cpu(cpu) + rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); + check_cpu_stall_init(); } #include "rcutree_plugin.h"