SAFE public projects git trees. - safe/jmp/linux-2.6/blob - kernel/rcutree_plugin.h

   1 /*
   2  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
   3  * Internal non-public definitions that provide either classic
   4  * or preemptable semantics.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  19  *
  20  * Copyright Red Hat, 2009
  21  * Copyright IBM Corporation, 2009
  22  *
  23  * Author: Ingo Molnar <mingo@elte.hu>
  24  *         Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  25  */
  26
  27 #include <linux/delay.h>
  28
  29 #ifdef CONFIG_TREE_PREEMPT_RCU
  30
  31 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
  32 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
  33
  34 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
  35
  36 /*
  37  * Tell them what RCU they are running.
  38  */
  39 static void __init rcu_bootup_announce(void)
  40 {
  41         printk(KERN_INFO
  42                "Experimental preemptable hierarchical RCU implementation.\n");
  43 }
  44
  45 /*
  46  * Return the number of RCU-preempt batches processed thus far
  47  * for debug and statistics.
  48  */
  49 long rcu_batches_completed_preempt(void)
  50 {
  51         return rcu_preempt_state.completed;
  52 }
  53 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
  54
  55 /*
  56  * Return the number of RCU batches processed thus far for debug & stats.
  57  */
  58 long rcu_batches_completed(void)
  59 {
  60         return rcu_batches_completed_preempt();
  61 }
  62 EXPORT_SYMBOL_GPL(rcu_batches_completed);
  63
  64 /*
  65  * Force a quiescent state for preemptible RCU.
  66  */
  67 void rcu_force_quiescent_state(void)
  68 {
  69         force_quiescent_state(&rcu_preempt_state, 0);
  70 }
  71 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  72
  73 /*
  74  * Record a preemptable-RCU quiescent state for the specified CPU.  Note
  75  * that this just means that the task currently running on the CPU is
  76  * not in a quiescent state.  There might be any number of tasks blocked
  77  * while in an RCU read-side critical section.
  78  */
  79 static void rcu_preempt_qs(int cpu)
  80 {
  81         struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
  82         rdp->passed_quiesc_completed = rdp->gpnum - 1;
  83         barrier();
  84         rdp->passed_quiesc = 1;
  85 }
  86
  87 /*
  88  * We have entered the scheduler, and the current task might soon be
  89  * context-switched away from.  If this task is in an RCU read-side
  90  * critical section, we will no longer be able to rely on the CPU to
  91  * record that fact, so we enqueue the task on the appropriate entry
  92  * of the blocked_tasks[] array.  The task will dequeue itself when
  93  * it exits the outermost enclosing RCU read-side critical section.
  94  * Therefore, the current grace period cannot be permitted to complete
  95  * until the blocked_tasks[] entry indexed by the low-order bit of
  96  * rnp->gpnum empties.
  97  *
  98  * Caller must disable preemption.
  99  */
 100 static void rcu_preempt_note_context_switch(int cpu)
 101 {
 102         struct task_struct *t = current;
 103         unsigned long flags;
 104         int phase;
 105         struct rcu_data *rdp;
 106         struct rcu_node *rnp;
 107
 108         if (t->rcu_read_lock_nesting &&
 109             (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 110
 111                 /* Possibly blocking in an RCU read-side critical section. */
 112                 rdp = rcu_preempt_state.rda[cpu];
 113                 rnp = rdp->mynode;
 114                 raw_spin_lock_irqsave(&rnp->lock, flags);
 115                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
 116                 t->rcu_blocked_node = rnp;
 117
 118                 /*
 119                  * If this CPU has already checked in, then this task
 120                  * will hold up the next grace period rather than the
 121                  * current grace period.  Queue the task accordingly.
 122                  * If the task is queued for the current grace period
 123                  * (i.e., this CPU has not yet passed through a quiescent
 124                  * state for the current grace period), then as long
 125                  * as that task remains queued, the current grace period
 126                  * cannot end.
 127                  *
 128                  * But first, note that the current CPU must still be
 129                  * on line!
 130                  */
 131                 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
 132                 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
 133                 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
 134                 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
 135                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 136         }
 137
 138         /*
 139          * Either we were not in an RCU read-side critical section to
 140          * begin with, or we have now recorded that critical section
 141          * globally.  Either way, we can now note a quiescent state
 142          * for this CPU.  Again, if we were in an RCU read-side critical
 143          * section, and if that critical section was blocking the current
 144          * grace period, then the fact that the task has been enqueued
 145          * means that we continue to block the current grace period.
 146          */
 147         rcu_preempt_qs(cpu);
 148         local_irq_save(flags);
 149         t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 150         local_irq_restore(flags);
 151 }
 152
 153 /*
 154  * Tree-preemptable RCU implementation for rcu_read_lock().
 155  * Just increment ->rcu_read_lock_nesting, shared state will be updated
 156  * if we block.
 157  */
 158 void __rcu_read_lock(void)
 159 {
 160         ACCESS_ONCE(current->rcu_read_lock_nesting)++;
 161         barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
 162 }
 163 EXPORT_SYMBOL_GPL(__rcu_read_lock);
 164
 165 /*
 166  * Check for preempted RCU readers blocking the current grace period
 167  * for the specified rcu_node structure.  If the caller needs a reliable
 168  * answer, it must hold the rcu_node's ->lock.
 169  */
 170 static int rcu_preempted_readers(struct rcu_node *rnp)
 171 {
 172         int phase = rnp->gpnum & 0x1;
 173
 174         return !list_empty(&rnp->blocked_tasks[phase]) ||
 175                !list_empty(&rnp->blocked_tasks[phase + 2]);
 176 }
 177
 178 /*
 179  * Record a quiescent state for all tasks that were previously queued
 180  * on the specified rcu_node structure and that were blocking the current
 181  * RCU grace period.  The caller must hold the specified rnp->lock with
 182  * irqs disabled, and this lock is released upon return, but irqs remain
 183  * disabled.
 184  */
 185 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 186         __releases(rnp->lock)
 187 {
 188         unsigned long mask;
 189         struct rcu_node *rnp_p;
 190
 191         if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
 192                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 193                 return;  /* Still need more quiescent states! */
 194         }
 195
 196         rnp_p = rnp->parent;
 197         if (rnp_p == NULL) {
 198                 /*
 199                  * Either there is only one rcu_node in the tree,
 200                  * or tasks were kicked up to root rcu_node due to
 201                  * CPUs going offline.
 202                  */
 203                 rcu_report_qs_rsp(&rcu_preempt_state, flags);
 204                 return;
 205         }
 206
 207         /* Report up the rest of the hierarchy. */
 208         mask = rnp->grpmask;
 209         raw_spin_unlock(&rnp->lock);    /* irqs remain disabled. */
 210         raw_spin_lock(&rnp_p->lock);    /* irqs already disabled. */
 211         rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
 212 }
 213
 214 /*
 215  * Handle special cases during rcu_read_unlock(), such as needing to
 216  * notify RCU core processing or task having blocked during the RCU
 217  * read-side critical section.
 218  */
 219 static void rcu_read_unlock_special(struct task_struct *t)
 220 {
 221         int empty;
 222         int empty_exp;
 223         unsigned long flags;
 224         struct rcu_node *rnp;
 225         int special;
 226
 227         /* NMI handlers cannot block and cannot safely manipulate state. */
 228         if (in_nmi())
 229                 return;
 230
 231         local_irq_save(flags);
 232
 233         /*
 234          * If RCU core is waiting for this CPU to exit critical section,
 235          * let it know that we have done so.
 236          */
 237         special = t->rcu_read_unlock_special;
 238         if (special & RCU_READ_UNLOCK_NEED_QS) {
 239                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 240                 rcu_preempt_qs(smp_processor_id());
 241         }
 242
 243         /* Hardware IRQ handlers cannot block. */
 244         if (in_irq()) {
 245                 local_irq_restore(flags);
 246                 return;
 247         }
 248
 249         /* Clean up if blocked during RCU read-side critical section. */
 250         if (special & RCU_READ_UNLOCK_BLOCKED) {
 251                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 252
 253                 /*
 254                  * Remove this task from the list it blocked on.  The
 255                  * task can migrate while we acquire the lock, but at
 256                  * most one time.  So at most two passes through loop.
 257                  */
 258                 for (;;) {
 259                         rnp = t->rcu_blocked_node;
 260                         raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
 261                         if (rnp == t->rcu_blocked_node)
 262                                 break;
 263                         raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 264                 }
 265                 empty = !rcu_preempted_readers(rnp);
 266                 empty_exp = !rcu_preempted_readers_exp(rnp);
 267                 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
 268                 list_del_init(&t->rcu_node_entry);
 269                 t->rcu_blocked_node = NULL;
 270
 271                 /*
 272                  * If this was the last task on the current list, and if
 273                  * we aren't waiting on any CPUs, report the quiescent state.
 274                  * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
 275                  */
 276                 if (empty)
 277                         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 278                 else
 279                         rcu_report_unblock_qs_rnp(rnp, flags);
 280
 281                 /*
 282                  * If this was the last task on the expedited lists,
 283                  * then we need to report up the rcu_node hierarchy.
 284                  */
 285                 if (!empty_exp && !rcu_preempted_readers_exp(rnp))
 286                         rcu_report_exp_rnp(&rcu_preempt_state, rnp);
 287         } else {
 288                 local_irq_restore(flags);
 289         }
 290 }
 291
 292 /*
 293  * Tree-preemptable RCU implementation for rcu_read_unlock().
 294  * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
 295  * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
 296  * invoke rcu_read_unlock_special() to clean up after a context switch
 297  * in an RCU read-side critical section and other special cases.
 298  */
 299 void __rcu_read_unlock(void)
 300 {
 301         struct task_struct *t = current;
 302
 303         barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */
 304         if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
 305             unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
 306                 rcu_read_unlock_special(t);
 307 #ifdef CONFIG_PROVE_LOCKING
 308         WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
 309 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 310 }
 311 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 312
 313 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 314
 315 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
 316
 317 /*
 318  * Dump detailed information for all tasks blocking the current RCU
 319  * grace period on the specified rcu_node structure.
 320  */
 321 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
 322 {
 323         unsigned long flags;
 324         struct list_head *lp;
 325         int phase;
 326         struct task_struct *t;
 327
 328         if (rcu_preempted_readers(rnp)) {
 329                 raw_spin_lock_irqsave(&rnp->lock, flags);
 330                 phase = rnp->gpnum & 0x1;
 331                 lp = &rnp->blocked_tasks[phase];
 332                 list_for_each_entry(t, lp, rcu_node_entry)
 333                         sched_show_task(t);
 334                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
 335         }
 336 }
 337
 338 /*
 339  * Dump detailed information for all tasks blocking the current RCU
 340  * grace period.
 341  */
 342 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 343 {
 344         struct rcu_node *rnp = rcu_get_root(rsp);
 345
 346         rcu_print_detail_task_stall_rnp(rnp);
 347         rcu_for_each_leaf_node(rsp, rnp)
 348                 rcu_print_detail_task_stall_rnp(rnp);
 349 }
 350
 351 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
 352
 353 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 354 {
 355 }
 356
 357 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
 358
 359 /*
 360  * Scan the current list of tasks blocked within RCU read-side critical
 361  * sections, printing out the tid of each.
 362  */
 363 static void rcu_print_task_stall(struct rcu_node *rnp)
 364 {
 365         struct list_head *lp;
 366         int phase;
 367         struct task_struct *t;
 368
 369         if (rcu_preempted_readers(rnp)) {
 370                 phase = rnp->gpnum & 0x1;
 371                 lp = &rnp->blocked_tasks[phase];
 372                 list_for_each_entry(t, lp, rcu_node_entry)
 373                         printk(" P%d", t->pid);
 374         }
 375 }
 376
 377 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 378
 379 /*
 380  * Check that the list of blocked tasks for the newly completed grace
 381  * period is in fact empty.  It is a serious bug to complete a grace
 382  * period that still has RCU readers blocked!  This function must be
 383  * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
 384  * must be held by the caller.
 385  */
 386 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 387 {
 388         WARN_ON_ONCE(rcu_preempted_readers(rnp));
 389         WARN_ON_ONCE(rnp->qsmask);
 390 }
 391
 392 #ifdef CONFIG_HOTPLUG_CPU
 393
 394 /*
 395  * Handle tasklist migration for case in which all CPUs covered by the
 396  * specified rcu_node have gone offline.  Move them up to the root
 397  * rcu_node.  The reason for not just moving them to the immediate
 398  * parent is to remove the need for rcu_read_unlock_special() to
 399  * make more than two attempts to acquire the target rcu_node's lock.
 400  * Returns true if there were tasks blocking the current RCU grace
 401  * period.
 402  *
 403  * Returns 1 if there was previously a task blocking the current grace
 404  * period on the specified rcu_node structure.
 405  *
 406  * The caller must hold rnp->lock with irqs disabled.
 407  */
 408 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 409                                      struct rcu_node *rnp,
 410                                      struct rcu_data *rdp)
 411 {
 412         int i;
 413         struct list_head *lp;
 414         struct list_head *lp_root;
 415         int retval = 0;
 416         struct rcu_node *rnp_root = rcu_get_root(rsp);
 417         struct task_struct *tp;
 418
 419         if (rnp == rnp_root) {
 420                 WARN_ONCE(1, "Last CPU thought to be offlined?");
 421                 return 0;  /* Shouldn't happen: at least one CPU online. */
 422         }
 423         WARN_ON_ONCE(rnp != rdp->mynode &&
 424                      (!list_empty(&rnp->blocked_tasks[0]) ||
 425                       !list_empty(&rnp->blocked_tasks[1]) ||
 426                       !list_empty(&rnp->blocked_tasks[2]) ||
 427                       !list_empty(&rnp->blocked_tasks[3])));
 428
 429         /*
 430          * Move tasks up to root rcu_node.  Rely on the fact that the
 431          * root rcu_node can be at most one ahead of the rest of the
 432          * rcu_nodes in terms of gp_num value.  This fact allows us to
 433          * move the blocked_tasks[] array directly, element by element.
 434          */
 435         if (rcu_preempted_readers(rnp))
 436                 retval |= RCU_OFL_TASKS_NORM_GP;
 437         if (rcu_preempted_readers_exp(rnp))
 438                 retval |= RCU_OFL_TASKS_EXP_GP;
 439         for (i = 0; i < 4; i++) {
 440                 lp = &rnp->blocked_tasks[i];
 441                 lp_root = &rnp_root->blocked_tasks[i];
 442                 while (!list_empty(lp)) {
 443                         tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
 444                         raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
 445                         list_del(&tp->rcu_node_entry);
 446                         tp->rcu_blocked_node = rnp_root;
 447                         list_add(&tp->rcu_node_entry, lp_root);
 448                         raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
 449                 }
 450         }
 451         return retval;
 452 }
 453
 454 /*
 455  * Do CPU-offline processing for preemptable RCU.
 456  */
 457 static void rcu_preempt_offline_cpu(int cpu)
 458 {
 459         __rcu_offline_cpu(cpu, &rcu_preempt_state);
 460 }
 461
 462 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 463
 464 /*
 465  * Check for a quiescent state from the current CPU.  When a task blocks,
 466  * the task is recorded in the corresponding CPU's rcu_node structure,
 467  * which is checked elsewhere.
 468  *
 469  * Caller must disable hard irqs.
 470  */
 471 static void rcu_preempt_check_callbacks(int cpu)
 472 {
 473         struct task_struct *t = current;
 474
 475         if (t->rcu_read_lock_nesting == 0) {
 476                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 477                 rcu_preempt_qs(cpu);
 478                 return;
 479         }
 480         if (per_cpu(rcu_preempt_data, cpu).qs_pending)
 481                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 482 }
 483
 484 /*
 485  * Process callbacks for preemptable RCU.
 486  */
 487 static void rcu_preempt_process_callbacks(void)
 488 {
 489         __rcu_process_callbacks(&rcu_preempt_state,
 490                                 &__get_cpu_var(rcu_preempt_data));
 491 }
 492
 493 /*
 494  * Queue a preemptable-RCU callback for invocation after a grace period.
 495  */
 496 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 497 {
 498         __call_rcu(head, func, &rcu_preempt_state);
 499 }
 500 EXPORT_SYMBOL_GPL(call_rcu);
 501
 502 /**
 503  * synchronize_rcu - wait until a grace period has elapsed.
 504  *
 505  * Control will return to the caller some time after a full grace
 506  * period has elapsed, in other words after all currently executing RCU
 507  * read-side critical sections have completed.  RCU read-side critical
 508  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
 509  * and may be nested.
 510  */
 511 void synchronize_rcu(void)
 512 {
 513         struct rcu_synchronize rcu;
 514
 515         if (!rcu_scheduler_active)
 516                 return;
 517
 518         init_completion(&rcu.completion);
 519         /* Will wake me after RCU finished. */
 520         call_rcu(&rcu.head, wakeme_after_rcu);
 521         /* Wait for it. */
 522         wait_for_completion(&rcu.completion);
 523 }
 524 EXPORT_SYMBOL_GPL(synchronize_rcu);
 525
 526 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
 527 static long sync_rcu_preempt_exp_count;
 528 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
 529
 530 /*
 531  * Return non-zero if there are any tasks in RCU read-side critical
 532  * sections blocking the current preemptible-RCU expedited grace period.
 533  * If there is no preemptible-RCU expedited grace period currently in
 534  * progress, returns zero unconditionally.
 535  */
 536 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
 537 {
 538         return !list_empty(&rnp->blocked_tasks[2]) ||
 539                !list_empty(&rnp->blocked_tasks[3]);
 540 }
 541
 542 /*
 543  * return non-zero if there is no RCU expedited grace period in progress
 544  * for the specified rcu_node structure, in other words, if all CPUs and
 545  * tasks covered by the specified rcu_node structure have done their bit
 546  * for the current expedited grace period.  Works only for preemptible
 547  * RCU -- other RCU implementation use other means.
 548  *
 549  * Caller must hold sync_rcu_preempt_exp_mutex.
 550  */
 551 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 552 {
 553         return !rcu_preempted_readers_exp(rnp) &&
 554                ACCESS_ONCE(rnp->expmask) == 0;
 555 }
 556
 557 /*
 558  * Report the exit from RCU read-side critical section for the last task
 559  * that queued itself during or before the current expedited preemptible-RCU
 560  * grace period.  This event is reported either to the rcu_node structure on
 561  * which the task was queued or to one of that rcu_node structure's ancestors,
 562  * recursively up the tree.  (Calm down, calm down, we do the recursion
 563  * iteratively!)
 564  *
 565  * Caller must hold sync_rcu_preempt_exp_mutex.
 566  */
 567 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 568 {
 569         unsigned long flags;
 570         unsigned long mask;
 571
 572         raw_spin_lock_irqsave(&rnp->lock, flags);
 573         for (;;) {
 574                 if (!sync_rcu_preempt_exp_done(rnp))
 575                         break;
 576                 if (rnp->parent == NULL) {
 577                         wake_up(&sync_rcu_preempt_exp_wq);
 578                         break;
 579                 }
 580                 mask = rnp->grpmask;
 581                 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
 582                 rnp = rnp->parent;
 583                 raw_spin_lock(&rnp->lock); /* irqs already disabled */
 584                 rnp->expmask &= ~mask;
 585         }
 586         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 587 }
 588
 589 /*
 590  * Snapshot the tasks blocking the newly started preemptible-RCU expedited
 591  * grace period for the specified rcu_node structure.  If there are no such
 592  * tasks, report it up the rcu_node hierarchy.
 593  *
 594  * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
 595  */
 596 static void
 597 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 598 {
 599         int must_wait;
 600
 601         raw_spin_lock(&rnp->lock); /* irqs already disabled */
 602         list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
 603         list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
 604         must_wait = rcu_preempted_readers_exp(rnp);
 605         raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
 606         if (!must_wait)
 607                 rcu_report_exp_rnp(rsp, rnp);
 608 }
 609
 610 /*
 611  * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
 612  * is to invoke synchronize_sched_expedited() to push all the tasks to
 613  * the ->blocked_tasks[] lists, move all entries from the first set of
 614  * ->blocked_tasks[] lists to the second set, and finally wait for this
 615  * second set to drain.
 616  */
 617 void synchronize_rcu_expedited(void)
 618 {
 619         unsigned long flags;
 620         struct rcu_node *rnp;
 621         struct rcu_state *rsp = &rcu_preempt_state;
 622         long snap;
 623         int trycount = 0;
 624
 625         smp_mb(); /* Caller's modifications seen first by other CPUs. */
 626         snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
 627         smp_mb(); /* Above access cannot bleed into critical section. */
 628
 629         /*
 630          * Acquire lock, falling back to synchronize_rcu() if too many
 631          * lock-acquisition failures.  Of course, if someone does the
 632          * expedited grace period for us, just leave.
 633          */
 634         while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
 635                 if (trycount++ < 10)
 636                         udelay(trycount * num_online_cpus());
 637                 else {
 638                         synchronize_rcu();
 639                         return;
 640                 }
 641                 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
 642                         goto mb_ret; /* Others did our work for us. */
 643         }
 644         if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
 645                 goto unlock_mb_ret; /* Others did our work for us. */
 646
 647         /* force all RCU readers onto blocked_tasks[]. */
 648         synchronize_sched_expedited();
 649
 650         raw_spin_lock_irqsave(&rsp->onofflock, flags);
 651
 652         /* Initialize ->expmask for all non-leaf rcu_node structures. */
 653         rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
 654                 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
 655                 rnp->expmask = rnp->qsmaskinit;
 656                 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 657         }
 658
 659         /* Snapshot current state of ->blocked_tasks[] lists. */
 660         rcu_for_each_leaf_node(rsp, rnp)
 661                 sync_rcu_preempt_exp_init(rsp, rnp);
 662         if (NUM_RCU_NODES > 1)
 663                 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
 664
 665         raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 666
 667         /* Wait for snapshotted ->blocked_tasks[] lists to drain. */
 668         rnp = rcu_get_root(rsp);
 669         wait_event(sync_rcu_preempt_exp_wq,
 670                    sync_rcu_preempt_exp_done(rnp));
 671
 672         /* Clean up and exit. */
 673         smp_mb(); /* ensure expedited GP seen before counter increment. */
 674         ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
 675 unlock_mb_ret:
 676         mutex_unlock(&sync_rcu_preempt_exp_mutex);
 677 mb_ret:
 678         smp_mb(); /* ensure subsequent action seen after grace period. */
 679 }
 680 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 681
 682 /*
 683  * Check to see if there is any immediate preemptable-RCU-related work
 684  * to be done.
 685  */
 686 static int rcu_preempt_pending(int cpu)
 687 {
 688         return __rcu_pending(&rcu_preempt_state,
 689                              &per_cpu(rcu_preempt_data, cpu));
 690 }
 691
 692 /*
 693  * Does preemptable RCU need the CPU to stay out of dynticks mode?
 694  */
 695 static int rcu_preempt_needs_cpu(int cpu)
 696 {
 697         return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
 698 }
 699
 700 /**
 701  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
 702  */
 703 void rcu_barrier(void)
 704 {
 705         _rcu_barrier(&rcu_preempt_state, call_rcu);
 706 }
 707 EXPORT_SYMBOL_GPL(rcu_barrier);
 708
 709 /*
 710  * Initialize preemptable RCU's per-CPU data.
 711  */
 712 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 713 {
 714         rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
 715 }
 716
 717 /*
 718  * Move preemptable RCU's callbacks to ->orphan_cbs_list.
 719  */
 720 static void rcu_preempt_send_cbs_to_orphanage(void)
 721 {
 722         rcu_send_cbs_to_orphanage(&rcu_preempt_state);
 723 }
 724
 725 /*
 726  * Initialize preemptable RCU's state structures.
 727  */
 728 static void __init __rcu_init_preempt(void)
 729 {
 730         RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
 731 }
 732
 733 /*
 734  * Check for a task exiting while in a preemptable-RCU read-side
 735  * critical section, clean up if so.  No need to issue warnings,
 736  * as debug_check_no_locks_held() already does this if lockdep
 737  * is enabled.
 738  */
 739 void exit_rcu(void)
 740 {
 741         struct task_struct *t = current;
 742
 743         if (t->rcu_read_lock_nesting == 0)
 744                 return;
 745         t->rcu_read_lock_nesting = 1;
 746         rcu_read_unlock();
 747 }
 748
 749 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 750
 751 /*
 752  * Tell them what RCU they are running.
 753  */
 754 static void __init rcu_bootup_announce(void)
 755 {
 756         printk(KERN_INFO "Hierarchical RCU implementation.\n");
 757 }
 758
 759 /*
 760  * Return the number of RCU batches processed thus far for debug & stats.
 761  */
 762 long rcu_batches_completed(void)
 763 {
 764         return rcu_batches_completed_sched();
 765 }
 766 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 767
 768 /*
 769  * Force a quiescent state for RCU, which, because there is no preemptible
 770  * RCU, becomes the same as rcu-sched.
 771  */
 772 void rcu_force_quiescent_state(void)
 773 {
 774         rcu_sched_force_quiescent_state();
 775 }
 776 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 777
 778 /*
 779  * Because preemptable RCU does not exist, we never have to check for
 780  * CPUs being in quiescent states.
 781  */
 782 static void rcu_preempt_note_context_switch(int cpu)
 783 {
 784 }
 785
 786 /*
 787  * Because preemptable RCU does not exist, there are never any preempted
 788  * RCU readers.
 789  */
 790 static int rcu_preempted_readers(struct rcu_node *rnp)
 791 {
 792         return 0;
 793 }
 794
 795 #ifdef CONFIG_HOTPLUG_CPU
 796
 797 /* Because preemptible RCU does not exist, no quieting of tasks. */
 798 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 799 {
 800         raw_spin_unlock_irqrestore(&rnp->lock, flags);
 801 }
 802
 803 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 804
 805 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 806
 807 /*
 808  * Because preemptable RCU does not exist, we never have to check for
 809  * tasks blocked within RCU read-side critical sections.
 810  */
 811 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 812 {
 813 }
 814
 815 /*
 816  * Because preemptable RCU does not exist, we never have to check for
 817  * tasks blocked within RCU read-side critical sections.
 818  */
 819 static void rcu_print_task_stall(struct rcu_node *rnp)
 820 {
 821 }
 822
 823 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 824
 825 /*
 826  * Because there is no preemptable RCU, there can be no readers blocked,
 827  * so there is no need to check for blocked tasks.  So check only for
 828  * bogus qsmask values.
 829  */
 830 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 831 {
 832         WARN_ON_ONCE(rnp->qsmask);
 833 }
 834
 835 #ifdef CONFIG_HOTPLUG_CPU
 836
 837 /*
 838  * Because preemptable RCU does not exist, it never needs to migrate
 839  * tasks that were blocked within RCU read-side critical sections, and
 840  * such non-existent tasks cannot possibly have been blocking the current
 841  * grace period.
 842  */
 843 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 844                                      struct rcu_node *rnp,
 845                                      struct rcu_data *rdp)
 846 {
 847         return 0;
 848 }
 849
 850 /*
 851  * Because preemptable RCU does not exist, it never needs CPU-offline
 852  * processing.
 853  */
 854 static void rcu_preempt_offline_cpu(int cpu)
 855 {
 856 }
 857
 858 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 859
 860 /*
 861  * Because preemptable RCU does not exist, it never has any callbacks
 862  * to check.
 863  */
 864 static void rcu_preempt_check_callbacks(int cpu)
 865 {
 866 }
 867
 868 /*
 869  * Because preemptable RCU does not exist, it never has any callbacks
 870  * to process.
 871  */
 872 static void rcu_preempt_process_callbacks(void)
 873 {
 874 }
 875
 876 /*
 877  * In classic RCU, call_rcu() is just call_rcu_sched().
 878  */
 879 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 880 {
 881         call_rcu_sched(head, func);
 882 }
 883 EXPORT_SYMBOL_GPL(call_rcu);
 884
 885 /*
 886  * Wait for an rcu-preempt grace period, but make it happen quickly.
 887  * But because preemptable RCU does not exist, map to rcu-sched.
 888  */
 889 void synchronize_rcu_expedited(void)
 890 {
 891         synchronize_sched_expedited();
 892 }
 893 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 894
 895 #ifdef CONFIG_HOTPLUG_CPU
 896
 897 /*
 898  * Because preemptable RCU does not exist, there is never any need to
 899  * report on tasks preempted in RCU read-side critical sections during
 900  * expedited RCU grace periods.
 901  */
 902 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 903 {
 904         return;
 905 }
 906
 907 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 908
 909 /*
 910  * Because preemptable RCU does not exist, it never has any work to do.
 911  */
 912 static int rcu_preempt_pending(int cpu)
 913 {
 914         return 0;
 915 }
 916
 917 /*
 918  * Because preemptable RCU does not exist, it never needs any CPU.
 919  */
 920 static int rcu_preempt_needs_cpu(int cpu)
 921 {
 922         return 0;
 923 }
 924
 925 /*
 926  * Because preemptable RCU does not exist, rcu_barrier() is just
 927  * another name for rcu_barrier_sched().
 928  */
 929 void rcu_barrier(void)
 930 {
 931         rcu_barrier_sched();
 932 }
 933 EXPORT_SYMBOL_GPL(rcu_barrier);
 934
 935 /*
 936  * Because preemptable RCU does not exist, there is no per-CPU
 937  * data to initialize.
 938  */
 939 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 940 {
 941 }
 942
 943 /*
 944  * Because there is no preemptable RCU, there are no callbacks to move.
 945  */
 946 static void rcu_preempt_send_cbs_to_orphanage(void)
 947 {
 948 }
 949
 950 /*
 951  * Because preemptable RCU does not exist, it need not be initialized.
 952  */
 953 static void __init __rcu_init_preempt(void)
 954 {
 955 }
 956
 957 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 958
 959 #if !defined(CONFIG_RCU_FAST_NO_HZ)
 960
 961 /*
 962  * Check to see if any future RCU-related work will need to be done
 963  * by the current CPU, even if none need be done immediately, returning
 964  * 1 if so.  This function is part of the RCU implementation; it is -not-
 965  * an exported member of the RCU API.
 966  *
 967  * Because we have preemptible RCU, just check whether this CPU needs
 968  * any flavor of RCU.  Do not chew up lots of CPU cycles with preemption
 969  * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
 970  */
 971 int rcu_needs_cpu(int cpu)
 972 {
 973         return rcu_needs_cpu_quick_check(cpu);
 974 }
 975
 976 /*
 977  * Check to see if we need to continue a callback-flush operations to
 978  * allow the last CPU to enter dyntick-idle mode.  But fast dyntick-idle
 979  * entry is not configured, so we never do need to.
 980  */
 981 static void rcu_needs_cpu_flush(void)
 982 {
 983 }
 984
 985 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
 986
 987 #define RCU_NEEDS_CPU_FLUSHES 5
 988 static DEFINE_PER_CPU(int, rcu_dyntick_drain);
 989 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
 990
 991 /*
 992  * Check to see if any future RCU-related work will need to be done
 993  * by the current CPU, even if none need be done immediately, returning
 994  * 1 if so.  This function is part of the RCU implementation; it is -not-
 995  * an exported member of the RCU API.
 996  *
 997  * Because we are not supporting preemptible RCU, attempt to accelerate
 998  * any current grace periods so that RCU no longer needs this CPU, but
 999  * only if all other CPUs are already in dynticks-idle mode.  This will
1000  * allow the CPU cores to be powered down immediately, as opposed to after
1001  * waiting many milliseconds for grace periods to elapse.
1002  *
1003  * Because it is not legal to invoke rcu_process_callbacks() with irqs
1004  * disabled, we do one pass of force_quiescent_state(), then do a
1005  * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
1006  * The per-cpu rcu_dyntick_drain variable controls the sequencing.
1007  */
1008 int rcu_needs_cpu(int cpu)
1009 {
1010         int c = 0;
1011         int thatcpu;
1012
1013         /* Check for being in the holdoff period. */
1014         if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
1015                 return rcu_needs_cpu_quick_check(cpu);
1016
1017         /* Don't bother unless we are the last non-dyntick-idle CPU. */
1018         for_each_cpu_not(thatcpu, nohz_cpu_mask)
1019                 if (thatcpu != cpu) {
1020                         per_cpu(rcu_dyntick_drain, cpu) = 0;
1021                         per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1022                         return rcu_needs_cpu_quick_check(cpu);
1023                 }
1024
1025         /* Check and update the rcu_dyntick_drain sequencing. */
1026         if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1027                 /* First time through, initialize the counter. */
1028                 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
1029         } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1030                 /* We have hit the limit, so time to give up. */
1031                 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
1032                 return rcu_needs_cpu_quick_check(cpu);
1033         }
1034
1035         /* Do one step pushing remaining RCU callbacks through. */
1036         if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1037                 rcu_sched_qs(cpu);
1038                 force_quiescent_state(&rcu_sched_state, 0);
1039                 c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
1040         }
1041         if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1042                 rcu_bh_qs(cpu);
1043                 force_quiescent_state(&rcu_bh_state, 0);
1044                 c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
1045         }
1046
1047         /* If RCU callbacks are still pending, RCU still needs this CPU. */
1048         if (c)
1049                 raise_softirq(RCU_SOFTIRQ);
1050         return c;
1051 }
1052
1053 /*
1054  * Check to see if we need to continue a callback-flush operations to
1055  * allow the last CPU to enter dyntick-idle mode.
1056  */
1057 static void rcu_needs_cpu_flush(void)
1058 {
1059         int cpu = smp_processor_id();
1060         unsigned long flags;
1061
1062         if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
1063                 return;
1064         local_irq_save(flags);
1065         (void)rcu_needs_cpu(cpu);
1066         local_irq_restore(flags);
1067 }
1068
1069 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */