X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Frcupdate.c;h=13458bbaa1be2bbac965b9584709152c9291cc46;hb=894b5779ceeabdce139068310e58bcf51ed9bb22;hp=0a669bd2f6d106724dafef8b398acf10ed5d01ae;hpb=e56d090310d7625ecb43a1eeebd479f04affb48b;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 0a669bd..13458bb 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -46,45 +46,64 @@ #include #include #include -#include #include +#include /* Definition for rcupdate control block. */ -struct rcu_ctrlblk rcu_ctrlblk = - { .cur = -300, .completed = -300 }; -struct rcu_ctrlblk rcu_bh_ctrlblk = - { .cur = -300, .completed = -300 }; - -/* Bookkeeping of the progress of the grace period */ -struct rcu_state { - spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */ - cpumask_t cpumask; /* CPUs that need to switch in order */ - /* for current batch to proceed. */ +static struct rcu_ctrlblk rcu_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, +}; +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, }; - -static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp = - {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; -static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp = - {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; /* Fake initialization required by compiler */ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; -static int maxbatch = 10000; +static int blimit = 10; +static int qhimark = 10000; +static int qlowmark = 100; +#ifdef CONFIG_SMP +static int rsinterval = 1000; +#endif -#ifndef __HAVE_ARCH_CMPXCHG -/* - * We use an array of spinlocks for the rcurefs -- similar to ones in sparc - * 32 bit atomic_t implementations, and a hash function similar to that - * for our refcounting needs. - * Can't help multiprocessors which donot have cmpxchg :( - */ +static atomic_t rcu_barrier_cpu_count; +static DEFINE_MUTEX(rcu_barrier_mutex); +static struct completion rcu_barrier_completion; -spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { - [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED -}; +#ifdef CONFIG_SMP +static void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + int cpu; + cpumask_t cpumask; + set_need_resched(); + if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) { + rdp->last_rs_qlen = rdp->qlen; + /* + * Don't send IPI to itself. With irqs disabled, + * rdp->cpu is the current cpu. + */ + cpumask = rcp->cpumask; + cpu_clear(rdp->cpu, cpumask); + for_each_cpu_mask(cpu, cpumask) + smp_send_reschedule(cpu); + } +} +#else +static inline void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + set_need_resched(); +} #endif /** @@ -110,17 +129,13 @@ void fastcall call_rcu(struct rcu_head *head, rdp = &__get_cpu_var(rcu_data); *rdp->nxttail = head; rdp->nxttail = &head->next; - - if (unlikely(++rdp->count > 10000)) - set_need_resched(); - + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_ctrlblk); + } local_irq_restore(flags); } -static atomic_t rcu_barrier_cpu_count; -static struct semaphore rcu_barrier_sema; -static struct completion rcu_barrier_completion; - /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -149,12 +164,12 @@ void fastcall call_rcu_bh(struct rcu_head *head, rdp = &__get_cpu_var(rcu_bh_data); *rdp->nxttail = head; rdp->nxttail = &head->next; - rdp->count++; -/* - * Should we directly call rcu_do_batch() here ? - * if (unlikely(rdp->count > 10000)) - * rcu_do_batch(rdp); - */ + + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_bh_ctrlblk); + } + local_irq_restore(flags); } @@ -193,13 +208,13 @@ static void rcu_barrier_func(void *notused) void rcu_barrier(void) { BUG_ON(in_interrupt()); - /* Take cpucontrol semaphore to protect against CPU hotplug */ - down(&rcu_barrier_sema); + /* Take cpucontrol mutex to protect against CPU hotplug */ + mutex_lock(&rcu_barrier_mutex); init_completion(&rcu_barrier_completion); atomic_set(&rcu_barrier_cpu_count, 0); on_each_cpu(rcu_barrier_func, NULL, 0, 1); wait_for_completion(&rcu_barrier_completion); - up(&rcu_barrier_sema); + mutex_unlock(&rcu_barrier_mutex); } EXPORT_SYMBOL_GPL(rcu_barrier); @@ -217,10 +232,12 @@ static void rcu_do_batch(struct rcu_data *rdp) next = rdp->donelist = list->next; list->func(list); list = next; - rdp->count--; - if (++count >= maxbatch) + rdp->qlen--; + if (++count >= rdp->blimit) break; } + if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) + rdp->blimit = blimit; if (!rdp->donelist) rdp->donetail = &rdp->donelist; else @@ -234,13 +251,13 @@ static void rcu_do_batch(struct rcu_data *rdp) * This is done by rcu_start_batch. The start is not broadcasted to * all cpus, they must pick this up by comparing rcp->cur with * rdp->quiescbatch. All cpus are recorded in the - * rcu_state.cpumask bitmap. + * rcu_ctrlblk.cpumask bitmap. * - All cpus must go through a quiescent state. * Since the start of the grace period is not broadcasted, at least two * calls to rcu_check_quiescent_state are required: * The first call just notices that a new grace period is running. The * following calls check if there was a quiescent state since the beginning - * of the grace period. If so, it updates rcu_state.cpumask. If + * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If * the bitmap is empty, then the grace period is completed. * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace * period (if necessary). @@ -248,14 +265,10 @@ static void rcu_do_batch(struct rcu_data *rdp) /* * Register a new batch of callbacks, and start it up if there is currently no * active batch and the batch to be registered has not already occurred. - * Caller must hold rcu_state.lock. + * Caller must hold rcu_ctrlblk.lock. */ -static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, - int next_pending) +static void rcu_start_batch(struct rcu_ctrlblk *rcp) { - if (next_pending) - rcp->next_pending = 1; - if (rcp->next_pending && rcp->completed == rcp->cur) { rcp->next_pending = 0; @@ -269,11 +282,11 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, /* * Accessing nohz_cpu_mask before incrementing rcp->cur needs a * Barrier Otherwise it can cause tickless idle CPUs to be - * included in rsp->cpumask, which will extend graceperiods + * included in rcp->cpumask, which will extend graceperiods * unnecessarily. */ smp_mb(); - cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); + cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); } } @@ -283,13 +296,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, * Clear it from the cpu mask and complete the grace period if it was the last * cpu. Start another grace period if someone has further entries pending */ -static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) +static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) { - cpu_clear(cpu, rsp->cpumask); - if (cpus_empty(rsp->cpumask)) { + cpu_clear(cpu, rcp->cpumask); + if (cpus_empty(rcp->cpumask)) { /* batch completed ! */ rcp->completed = rcp->cur; - rcu_start_batch(rcp, rsp, 0); + rcu_start_batch(rcp); } } @@ -299,7 +312,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) * quiescent cycle, then indicate that it has done so. */ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, - struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_data *rdp) { if (rdp->quiescbatch != rcp->cur) { /* start new grace period: */ @@ -324,15 +337,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, return; rdp->qs_pending = 0; - spin_lock(&rsp->lock); + spin_lock(&rcp->lock); /* * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync * during cpu startup. Ignore the quiescent state. */ if (likely(rdp->quiescbatch == rcp->cur)) - cpu_quiet(rdp->cpu, rcp, rsp); + cpu_quiet(rdp->cpu, rcp); - spin_unlock(&rsp->lock); + spin_unlock(&rcp->lock); } @@ -353,28 +366,29 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, } static void __rcu_offline_cpu(struct rcu_data *this_rdp, - struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { /* if the cpu going offline owns the grace period * we can block indefinitely waiting for it, so flush * it here */ - spin_lock_bh(&rsp->lock); + spin_lock_bh(&rcp->lock); if (rcp->cur != rcp->completed) - cpu_quiet(rdp->cpu, rcp, rsp); - spin_unlock_bh(&rsp->lock); + cpu_quiet(rdp->cpu, rcp); + spin_unlock_bh(&rcp->lock); rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); - + rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); } + static void rcu_offline_cpu(int cpu) { struct rcu_data *this_rdp = &get_cpu_var(rcu_data); struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); - __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state, + __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &per_cpu(rcu_data, cpu)); - __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state, + __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); put_cpu_var(rcu_data); put_cpu_var(rcu_bh_data); @@ -393,7 +407,7 @@ static void rcu_offline_cpu(int cpu) * This does the RCU processing work from tasklet context. */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, - struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_data *rdp) { if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { *rdp->donetail = rdp->curlist; @@ -402,8 +416,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, rdp->curtail = &rdp->curlist; } - local_irq_disable(); if (rdp->nxtlist && !rdp->curlist) { + local_irq_disable(); rdp->curlist = rdp->nxtlist; rdp->curtail = rdp->nxttail; rdp->nxtlist = NULL; @@ -423,24 +437,52 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, if (!rcp->next_pending) { /* and start it/schedule start if it's a new batch */ - spin_lock(&rsp->lock); - rcu_start_batch(rcp, rsp, 1); - spin_unlock(&rsp->lock); + spin_lock(&rcp->lock); + rcp->next_pending = 1; + rcu_start_batch(rcp); + spin_unlock(&rcp->lock); } - } else { - local_irq_enable(); } - rcu_check_quiescent_state(rcp, rsp, rdp); + + rcu_check_quiescent_state(rcp, rdp); if (rdp->donelist) rcu_do_batch(rdp); } static void rcu_process_callbacks(unsigned long unused) { - __rcu_process_callbacks(&rcu_ctrlblk, &rcu_state, - &__get_cpu_var(rcu_data)); - __rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state, - &__get_cpu_var(rcu_bh_data)); + __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); + __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); +} + +static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) +{ + /* This cpu has pending rcu entries and the grace period + * for them has completed. + */ + if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) + return 1; + + /* This cpu has no pending entries, but there are new entries */ + if (!rdp->curlist && rdp->nxtlist) + return 1; + + /* This cpu has finished callbacks to invoke */ + if (rdp->donelist) + return 1; + + /* The rcu core waits for a quiescent state from the cpu */ + if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) + return 1; + + /* nothing to do */ + return 0; +} + +int rcu_pending(int cpu) +{ + return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || + __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); } void rcu_check_callbacks(int cpu, int user) @@ -465,6 +507,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, rdp->quiescbatch = rcp->completed; rdp->qs_pending = 0; rdp->cpu = cpu; + rdp->blimit = blimit; } static void __devinit rcu_online_cpu(int cpu) @@ -506,7 +549,6 @@ static struct notifier_block __devinitdata rcu_nb = { */ void __init rcu_init(void) { - sema_init(&rcu_barrier_sema, 1); rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)smp_processor_id()); /* Register notifier for non-boot CPUs */ @@ -559,9 +601,14 @@ void synchronize_kernel(void) synchronize_rcu(); } -module_param(maxbatch, int, 0); +module_param(blimit, int, 0); +module_param(qhimark, int, 0); +module_param(qlowmark, int, 0); +#ifdef CONFIG_SMP +module_param(rsinterval, int, 0); +#endif EXPORT_SYMBOL_GPL(rcu_batches_completed); -EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ -EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL_GPL_FUTURE(call_rcu); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ EXPORT_SYMBOL_GPL(synchronize_rcu); -EXPORT_SYMBOL(synchronize_kernel); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */