SAFE public projects git trees. - safe/jmp/linux-2.6/blob - kernel/rcutree_plugin.h

   1 /*
   2  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
   3  * Internal non-public definitions that provide either classic
   4  * or preemptable semantics.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  19  *
  20  * Copyright Red Hat, 2009
  21  * Copyright IBM Corporation, 2009
  22  *
  23  * Author: Ingo Molnar <mingo@elte.hu>
  24  *         Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  25  */
  26
  27 #include <linux/delay.h>
  28
  29 #ifdef CONFIG_TREE_PREEMPT_RCU
  30
  31 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
  32 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
  33
  34 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
  35
  36 /*
  37  * Tell them what RCU they are running.
  38  */
  39 static void __init rcu_bootup_announce(void)
  40 {
  41         printk(KERN_INFO
  42                "Experimental preemptable hierarchical RCU implementation.\n");
  43 }
  44
  45 /*
  46  * Return the number of RCU-preempt batches processed thus far
  47  * for debug and statistics.
  48  */
  49 long rcu_batches_completed_preempt(void)
  50 {
  51         return rcu_preempt_state.completed;
  52 }
  53 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
  54
  55 /*
  56  * Return the number of RCU batches processed thus far for debug & stats.
  57  */
  58 long rcu_batches_completed(void)
  59 {
  60         return rcu_batches_completed_preempt();
  61 }
  62 EXPORT_SYMBOL_GPL(rcu_batches_completed);
  63
  64 /*
  65  * Force a quiescent state for preemptible RCU.
  66  */
  67 void rcu_force_quiescent_state(void)
  68 {
  69         force_quiescent_state(&rcu_preempt_state, 0);
  70 }
  71 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  72
  73 /*
  74  * Record a preemptable-RCU quiescent state for the specified CPU.  Note
  75  * that this just means that the task currently running on the CPU is
  76  * not in a quiescent state.  There might be any number of tasks blocked
  77  * while in an RCU read-side critical section.
  78  */
  79 static void rcu_preempt_qs(int cpu)
  80 {
  81         struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
  82         rdp->passed_quiesc_completed = rdp->gpnum - 1;
  83         barrier();
  84         rdp->passed_quiesc = 1;
  85 }
  86
  87 /*
  88  * We have entered the scheduler, and the current task might soon be
  89  * context-switched away from.  If this task is in an RCU read-side
  90  * critical section, we will no longer be able to rely on the CPU to
  91  * record that fact, so we enqueue the task on the appropriate entry
  92  * of the blocked_tasks[] array.  The task will dequeue itself when
  93  * it exits the outermost enclosing RCU read-side critical section.
  94  * Therefore, the current grace period cannot be permitted to complete
  95  * until the blocked_tasks[] entry indexed by the low-order bit of
  96  * rnp->gpnum empties.
  97  *
  98  * Caller must disable preemption.
  99  */
 100 static void rcu_preempt_note_context_switch(int cpu)
 101 {
 102         struct task_struct *t = current;
 103         unsigned long flags;
 104         int phase;
 105         struct rcu_data *rdp;
 106         struct rcu_node *rnp;
 107
 108         if (t->rcu_read_lock_nesting &&
 109             (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 110
 111                 /* Possibly blocking in an RCU read-side critical section. */
 112                 rdp = rcu_preempt_state.rda[cpu];
 113                 rnp = rdp->mynode;
 114                 raw_spin_lock_irqsave(&rnp->lock, flags);
 115                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
 116                 t->rcu_blocked_node = rnp;
 117
 118                 /*
 119                  * If this CPU has already checked in, then this task
 120                  * will hold up the next grace period rather than the
 121                  * current grace period.  Queue the task accordingly.
 122                  * If the task is queued for the current grace period
 123                  * (i.e., this CPU has not yet passed through a quiescent
 124                  * state for the current grace period), then as long
 125                  * as that task remains queued, the current grace period
 126                  * cannot end.
 127                  *
 128                  * But first, note that the current CPU must still be
 129                  * on line!
 130                  */
 131                 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
 132                 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
 133                 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
 134                 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
 135                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 136         }
 137
 138         /*
 139          * Either we were not in an RCU read-side critical section to
 140          * begin with, or we have now recorded that critical section
 141          * globally.  Either way, we can now note a quiescent state
 142          * for this CPU.  Again, if we were in an RCU read-side critical
 143          * section, and if that critical section was blocking the current
 144          * grace period, then the fact that the task has been enqueued
 145          * means that we continue to block the current grace period.
 146          */
 147         rcu_preempt_qs(cpu);
 148         local_irq_save(flags);
 149         t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 150         local_irq_restore(flags);
 151 }
 152
 153 /*
 154  * Tree-preemptable RCU implementation for rcu_read_lock().
 155  * Just increment ->rcu_read_lock_nesting, shared state will be updated
 156  * if we block.
 157  */
 158 void __rcu_read_lock(void)
 159 {
 160         ACCESS_ONCE(current->rcu_read_lock_nesting)++;
 161         barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
 162 }
 163 EXPORT_SYMBOL_GPL(__rcu_read_lock);
 164
 165 /*
 166  * Check for preempted RCU readers blocking the current grace period
 167  * for the specified rcu_node structure.  If the caller needs a reliable
 168  * answer, it must hold the rcu_node's ->lock.
 169  */
 170 static int rcu_preempted_readers(struct rcu_node *rnp)
 171 {
 172         int phase = rnp->gpnum & 0x1;
 173
 174         return !list_empty(&rnp->blocked_tasks[phase]) ||
 175                !list_empty(&rnp->blocked_tasks[phase + 2]);
 176 }
 177
 178 /*
 179  * Record a quiescent state for all tasks that were previously queued
 180  * on the specified rcu_node structure and that were blocking the current
 181  * RCU grace period.  The caller must hold the specified rnp->lock with
 182  * irqs disabled, and this lock is released upon return, but irqs remain
 183  * disabled.
 184  */
 185 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 186         __releases(rnp->lock)
 187 {
 188         unsigned long mask;
 189         struct rcu_node *rnp_p;
 190
 191         if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
 192                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 193                 return;  /* Still need more quiescent states! */
 194         }
 195
 196         rnp_p = rnp->parent;
 197         if (rnp_p == NULL) {
 198                 /*
 199                  * Either there is only one rcu_node in the tree,
 200                  * or tasks were kicked up to root rcu_node due to
 201                  * CPUs going offline.
 202                  */
 203                 rcu_report_qs_rsp(&rcu_preempt_state, flags);
 204                 return;
 205         }
 206
 207         /* Report up the rest of the hierarchy. */
 208         mask = rnp->grpmask;
 209         raw_spin_unlock(&rnp->lock);    /* irqs remain disabled. */
 210         raw_spin_lock(&rnp_p->lock);    /* irqs already disabled. */
 211         rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
 212 }
 213
 214 /*
 215  * Handle special cases during rcu_read_unlock(), such as needing to
 216  * notify RCU core processing or task having blocked during the RCU
 217  * read-side critical section.
 218  */
 219 static void rcu_read_unlock_special(struct task_struct *t)
 220 {
 221         int empty;
 222         int empty_exp;
 223         unsigned long flags;
 224         struct rcu_node *rnp;
 225         int special;
 226
 227         /* NMI handlers cannot block and cannot safely manipulate state. */
 228         if (in_nmi())
 229                 return;
 230
 231         local_irq_save(flags);
 232
 233         /*
 234          * If RCU core is waiting for this CPU to exit critical section,
 235          * let it know that we have done so.
 236          */
 237         special = t->rcu_read_unlock_special;
 238         if (special & RCU_READ_UNLOCK_NEED_QS) {
 239                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 240                 rcu_preempt_qs(smp_processor_id());
 241         }
 242
 243         /* Hardware IRQ handlers cannot block. */
 244         if (in_irq()) {
 245                 local_irq_restore(flags);
 246                 return;
 247         }
 248
 249         /* Clean up if blocked during RCU read-side critical section. */
 250         if (special & RCU_READ_UNLOCK_BLOCKED) {
 251                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 252
 253                 /*
 254                  * Remove this task from the list it blocked on.  The
 255                  * task can migrate while we acquire the lock, but at
 256                  * most one time.  So at most two passes through loop.
 257                  */
 258                 for (;;) {
 259                         rnp = t->rcu_blocked_node;
 260                         raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
 261                         if (rnp == t->rcu_blocked_node)
 262                                 break;
 263                         raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 264                 }
 265                 empty = !rcu_preempted_readers(rnp);
 266                 empty_exp = !rcu_preempted_readers_exp(rnp);
 267                 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
 268                 list_del_init(&t->rcu_node_entry);
 269                 t->rcu_blocked_node = NULL;
 270
 271                 /*
 272                  * If this was the last task on the current list, and if
 273                  * we aren't waiting on any CPUs, report the quiescent state.
 274                  * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
 275                  */
 276                 if (empty)
 277                         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 278                 else
 279                         rcu_report_unblock_qs_rnp(rnp, flags);
 280
 281                 /*
 282                  * If this was the last task on the expedited lists,
 283                  * then we need to report up the rcu_node hierarchy.
 284                  */
 285                 if (!empty_exp && !rcu_preempted_readers_exp(rnp))
 286                         rcu_report_exp_rnp(&rcu_preempt_state, rnp);
 287         } else {
 288                 local_irq_restore(flags);
 289         }
 290 }
 291
 292 /*
 293  * Tree-preemptable RCU implementation for rcu_read_unlock().
 294  * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
 295  * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
 296  * invoke rcu_read_unlock_special() to clean up after a context switch
 297  * in an RCU read-side critical section and other special cases.
 298  */
 299 void __rcu_read_unlock(void)
 300 {
 301         struct task_struct *t = current;
 302
 303         barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */
 304         if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
 305             unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
 306                 rcu_read_unlock_special(t);
 307 #ifdef CONFIG_PROVE_LOCKING
 308         WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
 309 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 310 }
 311 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 312
 313 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 314
 315 /*
 316  * Scan the current list of tasks blocked within RCU read-side critical
 317  * sections, printing out the tid of each.
 318  */
 319 static void rcu_print_task_stall(struct rcu_node *rnp)
 320 {
 321         struct list_head *lp;
 322         int phase;
 323         struct task_struct *t;
 324
 325         if (rcu_preempted_readers(rnp)) {
 326                 phase = rnp->gpnum & 0x1;
 327                 lp = &rnp->blocked_tasks[phase];
 328                 list_for_each_entry(t, lp, rcu_node_entry)
 329                         printk(" P%d", t->pid);
 330         }
 331 }
 332
 333 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 334
 335 /*
 336  * Check that the list of blocked tasks for the newly completed grace
 337  * period is in fact empty.  It is a serious bug to complete a grace
 338  * period that still has RCU readers blocked!  This function must be
 339  * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
 340  * must be held by the caller.
 341  */
 342 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 343 {
 344         WARN_ON_ONCE(rcu_preempted_readers(rnp));
 345         WARN_ON_ONCE(rnp->qsmask);
 346 }
 347
 348 #ifdef CONFIG_HOTPLUG_CPU
 349
 350 /*
 351  * Handle tasklist migration for case in which all CPUs covered by the
 352  * specified rcu_node have gone offline.  Move them up to the root
 353  * rcu_node.  The reason for not just moving them to the immediate
 354  * parent is to remove the need for rcu_read_unlock_special() to
 355  * make more than two attempts to acquire the target rcu_node's lock.
 356  * Returns true if there were tasks blocking the current RCU grace
 357  * period.
 358  *
 359  * Returns 1 if there was previously a task blocking the current grace
 360  * period on the specified rcu_node structure.
 361  *
 362  * The caller must hold rnp->lock with irqs disabled.
 363  */
 364 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 365                                      struct rcu_node *rnp,
 366                                      struct rcu_data *rdp)
 367 {
 368         int i;
 369         struct list_head *lp;
 370         struct list_head *lp_root;
 371         int retval = 0;
 372         struct rcu_node *rnp_root = rcu_get_root(rsp);
 373         struct task_struct *tp;
 374
 375         if (rnp == rnp_root) {
 376                 WARN_ONCE(1, "Last CPU thought to be offlined?");
 377                 return 0;  /* Shouldn't happen: at least one CPU online. */
 378         }
 379         WARN_ON_ONCE(rnp != rdp->mynode &&
 380                      (!list_empty(&rnp->blocked_tasks[0]) ||
 381                       !list_empty(&rnp->blocked_tasks[1]) ||
 382                       !list_empty(&rnp->blocked_tasks[2]) ||
 383                       !list_empty(&rnp->blocked_tasks[3])));
 384
 385         /*
 386          * Move tasks up to root rcu_node.  Rely on the fact that the
 387          * root rcu_node can be at most one ahead of the rest of the
 388          * rcu_nodes in terms of gp_num value.  This fact allows us to
 389          * move the blocked_tasks[] array directly, element by element.
 390          */
 391         if (rcu_preempted_readers(rnp))
 392                 retval |= RCU_OFL_TASKS_NORM_GP;
 393         if (rcu_preempted_readers_exp(rnp))
 394                 retval |= RCU_OFL_TASKS_EXP_GP;
 395         for (i = 0; i < 4; i++) {
 396                 lp = &rnp->blocked_tasks[i];
 397                 lp_root = &rnp_root->blocked_tasks[i];
 398                 while (!list_empty(lp)) {
 399                         tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
 400                         raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
 401                         list_del(&tp->rcu_node_entry);
 402                         tp->rcu_blocked_node = rnp_root;
 403                         list_add(&tp->rcu_node_entry, lp_root);
 404                         raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
 405                 }
 406         }
 407         return retval;
 408 }
 409
 410 /*
 411  * Do CPU-offline processing for preemptable RCU.
 412  */
 413 static void rcu_preempt_offline_cpu(int cpu)
 414 {
 415         __rcu_offline_cpu(cpu, &rcu_preempt_state);
 416 }
 417
 418 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 419
 420 /*
 421  * Check for a quiescent state from the current CPU.  When a task blocks,
 422  * the task is recorded in the corresponding CPU's rcu_node structure,
 423  * which is checked elsewhere.
 424  *
 425  * Caller must disable hard irqs.
 426  */
 427 static void rcu_preempt_check_callbacks(int cpu)
 428 {
 429         struct task_struct *t = current;
 430
 431         if (t->rcu_read_lock_nesting == 0) {
 432                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 433                 rcu_preempt_qs(cpu);
 434                 return;
 435         }
 436         if (per_cpu(rcu_preempt_data, cpu).qs_pending)
 437                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 438 }
 439
 440 /*
 441  * Process callbacks for preemptable RCU.
 442  */
 443 static void rcu_preempt_process_callbacks(void)
 444 {
 445         __rcu_process_callbacks(&rcu_preempt_state,
 446                                 &__get_cpu_var(rcu_preempt_data));
 447 }
 448
 449 /*
 450  * Queue a preemptable-RCU callback for invocation after a grace period.
 451  */
 452 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 453 {
 454         __call_rcu(head, func, &rcu_preempt_state);
 455 }
 456 EXPORT_SYMBOL_GPL(call_rcu);
 457
 458 /**
 459  * synchronize_rcu - wait until a grace period has elapsed.
 460  *
 461  * Control will return to the caller some time after a full grace
 462  * period has elapsed, in other words after all currently executing RCU
 463  * read-side critical sections have completed.  RCU read-side critical
 464  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
 465  * and may be nested.
 466  */
 467 void synchronize_rcu(void)
 468 {
 469         struct rcu_synchronize rcu;
 470
 471         if (!rcu_scheduler_active)
 472                 return;
 473
 474         init_completion(&rcu.completion);
 475         /* Will wake me after RCU finished. */
 476         call_rcu(&rcu.head, wakeme_after_rcu);
 477         /* Wait for it. */
 478         wait_for_completion(&rcu.completion);
 479 }
 480 EXPORT_SYMBOL_GPL(synchronize_rcu);
 481
 482 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
 483 static long sync_rcu_preempt_exp_count;
 484 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
 485
 486 /*
 487  * Return non-zero if there are any tasks in RCU read-side critical
 488  * sections blocking the current preemptible-RCU expedited grace period.
 489  * If there is no preemptible-RCU expedited grace period currently in
 490  * progress, returns zero unconditionally.
 491  */
 492 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
 493 {
 494         return !list_empty(&rnp->blocked_tasks[2]) ||
 495                !list_empty(&rnp->blocked_tasks[3]);
 496 }
 497
 498 /*
 499  * return non-zero if there is no RCU expedited grace period in progress
 500  * for the specified rcu_node structure, in other words, if all CPUs and
 501  * tasks covered by the specified rcu_node structure have done their bit
 502  * for the current expedited grace period.  Works only for preemptible
 503  * RCU -- other RCU implementation use other means.
 504  *
 505  * Caller must hold sync_rcu_preempt_exp_mutex.
 506  */
 507 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 508 {
 509         return !rcu_preempted_readers_exp(rnp) &&
 510                ACCESS_ONCE(rnp->expmask) == 0;
 511 }
 512
 513 /*
 514  * Report the exit from RCU read-side critical section for the last task
 515  * that queued itself during or before the current expedited preemptible-RCU
 516  * grace period.  This event is reported either to the rcu_node structure on
 517  * which the task was queued or to one of that rcu_node structure's ancestors,
 518  * recursively up the tree.  (Calm down, calm down, we do the recursion
 519  * iteratively!)
 520  *
 521  * Caller must hold sync_rcu_preempt_exp_mutex.
 522  */
 523 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 524 {
 525         unsigned long flags;
 526         unsigned long mask;
 527
 528         raw_spin_lock_irqsave(&rnp->lock, flags);
 529         for (;;) {
 530                 if (!sync_rcu_preempt_exp_done(rnp))
 531                         break;
 532                 if (rnp->parent == NULL) {
 533                         wake_up(&sync_rcu_preempt_exp_wq);
 534                         break;
 535                 }
 536                 mask = rnp->grpmask;
 537                 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
 538                 rnp = rnp->parent;
 539                 raw_spin_lock(&rnp->lock); /* irqs already disabled */
 540                 rnp->expmask &= ~mask;
 541         }
 542         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 543 }
 544
 545 /*
 546  * Snapshot the tasks blocking the newly started preemptible-RCU expedited
 547  * grace period for the specified rcu_node structure.  If there are no such
 548  * tasks, report it up the rcu_node hierarchy.
 549  *
 550  * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
 551  */
 552 static void
 553 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 554 {
 555         int must_wait;
 556
 557         raw_spin_lock(&rnp->lock); /* irqs already disabled */
 558         list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
 559         list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
 560         must_wait = rcu_preempted_readers_exp(rnp);
 561         raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
 562         if (!must_wait)
 563                 rcu_report_exp_rnp(rsp, rnp);
 564 }
 565
 566 /*
 567  * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
 568  * is to invoke synchronize_sched_expedited() to push all the tasks to
 569  * the ->blocked_tasks[] lists, move all entries from the first set of
 570  * ->blocked_tasks[] lists to the second set, and finally wait for this
 571  * second set to drain.
 572  */
 573 void synchronize_rcu_expedited(void)
 574 {
 575         unsigned long flags;
 576         struct rcu_node *rnp;
 577         struct rcu_state *rsp = &rcu_preempt_state;
 578         long snap;
 579         int trycount = 0;
 580
 581         smp_mb(); /* Caller's modifications seen first by other CPUs. */
 582         snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
 583         smp_mb(); /* Above access cannot bleed into critical section. */
 584
 585         /*
 586          * Acquire lock, falling back to synchronize_rcu() if too many
 587          * lock-acquisition failures.  Of course, if someone does the
 588          * expedited grace period for us, just leave.
 589          */
 590         while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
 591                 if (trycount++ < 10)
 592                         udelay(trycount * num_online_cpus());
 593                 else {
 594                         synchronize_rcu();
 595                         return;
 596                 }
 597                 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
 598                         goto mb_ret; /* Others did our work for us. */
 599         }
 600         if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
 601                 goto unlock_mb_ret; /* Others did our work for us. */
 602
 603         /* force all RCU readers onto blocked_tasks[]. */
 604         synchronize_sched_expedited();
 605
 606         raw_spin_lock_irqsave(&rsp->onofflock, flags);
 607
 608         /* Initialize ->expmask for all non-leaf rcu_node structures. */
 609         rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
 610                 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
 611                 rnp->expmask = rnp->qsmaskinit;
 612                 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 613         }
 614
 615         /* Snapshot current state of ->blocked_tasks[] lists. */
 616         rcu_for_each_leaf_node(rsp, rnp)
 617                 sync_rcu_preempt_exp_init(rsp, rnp);
 618         if (NUM_RCU_NODES > 1)
 619                 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
 620
 621         raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 622
 623         /* Wait for snapshotted ->blocked_tasks[] lists to drain. */
 624         rnp = rcu_get_root(rsp);
 625         wait_event(sync_rcu_preempt_exp_wq,
 626                    sync_rcu_preempt_exp_done(rnp));
 627
 628         /* Clean up and exit. */
 629         smp_mb(); /* ensure expedited GP seen before counter increment. */
 630         ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
 631 unlock_mb_ret:
 632         mutex_unlock(&sync_rcu_preempt_exp_mutex);
 633 mb_ret:
 634         smp_mb(); /* ensure subsequent action seen after grace period. */
 635 }
 636 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 637
 638 /*
 639  * Check to see if there is any immediate preemptable-RCU-related work
 640  * to be done.
 641  */
 642 static int rcu_preempt_pending(int cpu)
 643 {
 644         return __rcu_pending(&rcu_preempt_state,
 645                              &per_cpu(rcu_preempt_data, cpu));
 646 }
 647
 648 /*
 649  * Does preemptable RCU need the CPU to stay out of dynticks mode?
 650  */
 651 static int rcu_preempt_needs_cpu(int cpu)
 652 {
 653         return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
 654 }
 655
 656 /**
 657  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
 658  */
 659 void rcu_barrier(void)
 660 {
 661         _rcu_barrier(&rcu_preempt_state, call_rcu);
 662 }
 663 EXPORT_SYMBOL_GPL(rcu_barrier);
 664
 665 /*
 666  * Initialize preemptable RCU's per-CPU data.
 667  */
 668 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 669 {
 670         rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
 671 }
 672
 673 /*
 674  * Move preemptable RCU's callbacks to ->orphan_cbs_list.
 675  */
 676 static void rcu_preempt_send_cbs_to_orphanage(void)
 677 {
 678         rcu_send_cbs_to_orphanage(&rcu_preempt_state);
 679 }
 680
 681 /*
 682  * Initialize preemptable RCU's state structures.
 683  */
 684 static void __init __rcu_init_preempt(void)
 685 {
 686         RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
 687 }
 688
 689 /*
 690  * Check for a task exiting while in a preemptable-RCU read-side
 691  * critical section, clean up if so.  No need to issue warnings,
 692  * as debug_check_no_locks_held() already does this if lockdep
 693  * is enabled.
 694  */
 695 void exit_rcu(void)
 696 {
 697         struct task_struct *t = current;
 698
 699         if (t->rcu_read_lock_nesting == 0)
 700                 return;
 701         t->rcu_read_lock_nesting = 1;
 702         rcu_read_unlock();
 703 }
 704
 705 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 706
 707 /*
 708  * Tell them what RCU they are running.
 709  */
 710 static void __init rcu_bootup_announce(void)
 711 {
 712         printk(KERN_INFO "Hierarchical RCU implementation.\n");
 713 }
 714
 715 /*
 716  * Return the number of RCU batches processed thus far for debug & stats.
 717  */
 718 long rcu_batches_completed(void)
 719 {
 720         return rcu_batches_completed_sched();
 721 }
 722 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 723
 724 /*
 725  * Force a quiescent state for RCU, which, because there is no preemptible
 726  * RCU, becomes the same as rcu-sched.
 727  */
 728 void rcu_force_quiescent_state(void)
 729 {
 730         rcu_sched_force_quiescent_state();
 731 }
 732 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 733
 734 /*
 735  * Because preemptable RCU does not exist, we never have to check for
 736  * CPUs being in quiescent states.
 737  */
 738 static void rcu_preempt_note_context_switch(int cpu)
 739 {
 740 }
 741
 742 /*
 743  * Because preemptable RCU does not exist, there are never any preempted
 744  * RCU readers.
 745  */
 746 static int rcu_preempted_readers(struct rcu_node *rnp)
 747 {
 748         return 0;
 749 }
 750
 751 #ifdef CONFIG_HOTPLUG_CPU
 752
 753 /* Because preemptible RCU does not exist, no quieting of tasks. */
 754 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 755 {
 756         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 757 }
 758
 759 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 760
 761 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 762
 763 /*
 764  * Because preemptable RCU does not exist, we never have to check for
 765  * tasks blocked within RCU read-side critical sections.
 766  */
 767 static void rcu_print_task_stall(struct rcu_node *rnp)
 768 {
 769 }
 770
 771 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 772
 773 /*
 774  * Because there is no preemptable RCU, there can be no readers blocked,
 775  * so there is no need to check for blocked tasks.  So check only for
 776  * bogus qsmask values.
 777  */
 778 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 779 {
 780         WARN_ON_ONCE(rnp->qsmask);
 781 }
 782
 783 #ifdef CONFIG_HOTPLUG_CPU
 784
 785 /*
 786  * Because preemptable RCU does not exist, it never needs to migrate
 787  * tasks that were blocked within RCU read-side critical sections, and
 788  * such non-existent tasks cannot possibly have been blocking the current
 789  * grace period.
 790  */
 791 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 792                                      struct rcu_node *rnp,
 793                                      struct rcu_data *rdp)
 794 {
 795         return 0;
 796 }
 797
 798 /*
 799  * Because preemptable RCU does not exist, it never needs CPU-offline
 800  * processing.
 801  */
 802 static void rcu_preempt_offline_cpu(int cpu)
 803 {
 804 }
 805
 806 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 807
 808 /*
 809  * Because preemptable RCU does not exist, it never has any callbacks
 810  * to check.
 811  */
 812 static void rcu_preempt_check_callbacks(int cpu)
 813 {
 814 }
 815
 816 /*
 817  * Because preemptable RCU does not exist, it never has any callbacks
 818  * to process.
 819  */
 820 static void rcu_preempt_process_callbacks(void)
 821 {
 822 }
 823
 824 /*
 825  * In classic RCU, call_rcu() is just call_rcu_sched().
 826  */
 827 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 828 {
 829         call_rcu_sched(head, func);
 830 }
 831 EXPORT_SYMBOL_GPL(call_rcu);
 832
 833 /*
 834  * Wait for an rcu-preempt grace period, but make it happen quickly.
 835  * But because preemptable RCU does not exist, map to rcu-sched.
 836  */
 837 void synchronize_rcu_expedited(void)
 838 {
 839         synchronize_sched_expedited();
 840 }
 841 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 842
 843 #ifdef CONFIG_HOTPLUG_CPU
 844
 845 /*
 846  * Because preemptable RCU does not exist, there is never any need to
 847  * report on tasks preempted in RCU read-side critical sections during
 848  * expedited RCU grace periods.
 849  */
 850 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 851 {
 852         return;
 853 }
 854
 855 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 856
 857 /*
 858  * Because preemptable RCU does not exist, it never has any work to do.
 859  */
 860 static int rcu_preempt_pending(int cpu)
 861 {
 862         return 0;
 863 }
 864
 865 /*
 866  * Because preemptable RCU does not exist, it never needs any CPU.
 867  */
 868 static int rcu_preempt_needs_cpu(int cpu)
 869 {
 870         return 0;
 871 }
 872
 873 /*
 874  * Because preemptable RCU does not exist, rcu_barrier() is just
 875  * another name for rcu_barrier_sched().
 876  */
 877 void rcu_barrier(void)
 878 {
 879         rcu_barrier_sched();
 880 }
 881 EXPORT_SYMBOL_GPL(rcu_barrier);
 882
 883 /*
 884  * Because preemptable RCU does not exist, there is no per-CPU
 885  * data to initialize.
 886  */
 887 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 888 {
 889 }
 890
 891 /*
 892  * Because there is no preemptable RCU, there are no callbacks to move.
 893  */
 894 static void rcu_preempt_send_cbs_to_orphanage(void)
 895 {
 896 }
 897
 898 /*
 899  * Because preemptable RCU does not exist, it need not be initialized.
 900  */
 901 static void __init __rcu_init_preempt(void)
 902 {
 903 }
 904
 905 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 906
 907 #if !defined(CONFIG_RCU_FAST_NO_HZ)
 908
 909 /*
 910  * Check to see if any future RCU-related work will need to be done
 911  * by the current CPU, even if none need be done immediately, returning
 912  * 1 if so.  This function is part of the RCU implementation; it is -not-
 913  * an exported member of the RCU API.
 914  *
 915  * Because we have preemptible RCU, just check whether this CPU needs
 916  * any flavor of RCU.  Do not chew up lots of CPU cycles with preemption
 917  * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
 918  */
 919 int rcu_needs_cpu(int cpu)
 920 {
 921         return rcu_needs_cpu_quick_check(cpu);
 922 }
 923
 924 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
 925
 926 #define RCU_NEEDS_CPU_FLUSHES 5
 927
 928 /*
 929  * Check to see if any future RCU-related work will need to be done
 930  * by the current CPU, even if none need be done immediately, returning
 931  * 1 if so.  This function is part of the RCU implementation; it is -not-
 932  * an exported member of the RCU API.
 933  *
 934  * Because we are not supporting preemptible RCU, attempt to accelerate
 935  * any current grace periods so that RCU no longer needs this CPU, but
 936  * only if all other CPUs are already in dynticks-idle mode.  This will
 937  * allow the CPU cores to be powered down immediately, as opposed to after
 938  * waiting many milliseconds for grace periods to elapse.
 939  */
 940 int rcu_needs_cpu(int cpu)
 941 {
 942         int c = 1;
 943         int i;
 944         int thatcpu;
 945
 946         /* Don't bother unless we are the last non-dyntick-idle CPU. */
 947         for_each_cpu_not(thatcpu, nohz_cpu_mask)
 948                 if (thatcpu != cpu)
 949                         return rcu_needs_cpu_quick_check(cpu);
 950
 951         /* Try to push remaining RCU-sched and RCU-bh callbacks through. */
 952         for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) {
 953                 c = 0;
 954                 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
 955                         rcu_sched_qs(cpu);
 956                         force_quiescent_state(&rcu_sched_state, 0);
 957                         __rcu_process_callbacks(&rcu_sched_state,
 958                                                 &per_cpu(rcu_sched_data, cpu));
 959                         c = !!per_cpu(rcu_sched_data, cpu).nxtlist;
 960                 }
 961                 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
 962                         rcu_bh_qs(cpu);
 963                         force_quiescent_state(&rcu_bh_state, 0);
 964                         __rcu_process_callbacks(&rcu_bh_state,
 965                                                 &per_cpu(rcu_bh_data, cpu));
 966                         c = !!per_cpu(rcu_bh_data, cpu).nxtlist;
 967                 }
 968         }
 969
 970         /* If RCU callbacks are still pending, RCU still needs this CPU. */
 971         return c;
 972 }
 973
 974 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */