SAFE public projects git trees. - safe/jmp/linux-2.6/blob - kernel/sched_rt.c

   1 /*
   2  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
   3  * policies)
   4  */
   5
   6 /*
   7  * Update the current task's runtime statistics. Skip current tasks that
   8  * are not in our scheduling class.
   9  */
  10 static void update_curr_rt(struct rq *rq)
  11 {
  12         struct task_struct *curr = rq->curr;
  13         u64 delta_exec;
  14
  15         if (!task_has_rt_policy(curr))
  16                 return;
  17
  18         delta_exec = rq->clock - curr->se.exec_start;
  19         if (unlikely((s64)delta_exec < 0))
  20                 delta_exec = 0;
  21
  22         schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
  23
  24         curr->se.sum_exec_runtime += delta_exec;
  25         curr->se.exec_start = rq->clock;
  26         cpuacct_charge(curr, delta_exec);
  27 }
  28
  29 static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
  30 {
  31         WARN_ON(!rt_task(p));
  32         rq->rt.rt_nr_running++;
  33 #ifdef CONFIG_SMP
  34         if (p->prio < rq->rt.highest_prio)
  35                 rq->rt.highest_prio = p->prio;
  36 #endif /* CONFIG_SMP */
  37 }
  38
  39 static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
  40 {
  41         WARN_ON(!rt_task(p));
  42         WARN_ON(!rq->rt.rt_nr_running);
  43         rq->rt.rt_nr_running--;
  44 #ifdef CONFIG_SMP
  45         if (rq->rt.rt_nr_running) {
  46                 struct rt_prio_array *array;
  47
  48                 WARN_ON(p->prio < rq->rt.highest_prio);
  49                 if (p->prio == rq->rt.highest_prio) {
  50                         /* recalculate */
  51                         array = &rq->rt.active;
  52                         rq->rt.highest_prio =
  53                                 sched_find_first_bit(array->bitmap);
  54                 } /* otherwise leave rq->highest prio alone */
  55         } else
  56                 rq->rt.highest_prio = MAX_RT_PRIO;
  57 #endif /* CONFIG_SMP */
  58 }
  59
  60 static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
  61 {
  62         struct rt_prio_array *array = &rq->rt.active;
  63
  64         list_add_tail(&p->run_list, array->queue + p->prio);
  65         __set_bit(p->prio, array->bitmap);
  66         inc_cpu_load(rq, p->se.load.weight);
  67
  68         inc_rt_tasks(p, rq);
  69 }
  70
  71 /*
  72  * Adding/removing a task to/from a priority array:
  73  */
  74 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
  75 {
  76         struct rt_prio_array *array = &rq->rt.active;
  77
  78         update_curr_rt(rq);
  79
  80         list_del(&p->run_list);
  81         if (list_empty(array->queue + p->prio))
  82                 __clear_bit(p->prio, array->bitmap);
  83         dec_cpu_load(rq, p->se.load.weight);
  84
  85         dec_rt_tasks(p, rq);
  86 }
  87
  88 /*
  89  * Put task to the end of the run list without the overhead of dequeue
  90  * followed by enqueue.
  91  */
  92 static void requeue_task_rt(struct rq *rq, struct task_struct *p)
  93 {
  94         struct rt_prio_array *array = &rq->rt.active;
  95
  96         list_move_tail(&p->run_list, array->queue + p->prio);
  97 }
  98
  99 static void
 100 yield_task_rt(struct rq *rq)
 101 {
 102         requeue_task_rt(rq, rq->curr);
 103 }
 104
 105 /*
 106  * Preempt the current task with a newly woken task if needed:
 107  */
 108 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
 109 {
 110         if (p->prio < rq->curr->prio)
 111                 resched_task(rq->curr);
 112 }
 113
 114 static struct task_struct *pick_next_task_rt(struct rq *rq)
 115 {
 116         struct rt_prio_array *array = &rq->rt.active;
 117         struct task_struct *next;
 118         struct list_head *queue;
 119         int idx;
 120
 121         idx = sched_find_first_bit(array->bitmap);
 122         if (idx >= MAX_RT_PRIO)
 123                 return NULL;
 124
 125         queue = array->queue + idx;
 126         next = list_entry(queue->next, struct task_struct, run_list);
 127
 128         next->se.exec_start = rq->clock;
 129
 130         return next;
 131 }
 132
 133 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 134 {
 135         update_curr_rt(rq);
 136         p->se.exec_start = 0;
 137 }
 138
 139 #ifdef CONFIG_SMP
 140 /* Only try algorithms three times */
 141 #define RT_MAX_TRIES 3
 142
 143 static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
 144 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 145
 146 /* Return the second highest RT task, NULL otherwise */
 147 static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
 148 {
 149         struct rt_prio_array *array = &rq->rt.active;
 150         struct task_struct *next;
 151         struct list_head *queue;
 152         int idx;
 153
 154         assert_spin_locked(&rq->lock);
 155
 156         if (likely(rq->rt.rt_nr_running < 2))
 157                 return NULL;
 158
 159         idx = sched_find_first_bit(array->bitmap);
 160         if (unlikely(idx >= MAX_RT_PRIO)) {
 161                 WARN_ON(1); /* rt_nr_running is bad */
 162                 return NULL;
 163         }
 164
 165         queue = array->queue + idx;
 166         next = list_entry(queue->next, struct task_struct, run_list);
 167         if (unlikely(next != rq->curr))
 168                 return next;
 169
 170         if (queue->next->next != queue) {
 171                 /* same prio task */
 172                 next = list_entry(queue->next->next, struct task_struct, run_list);
 173                 return next;
 174         }
 175
 176         /* slower, but more flexible */
 177         idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
 178         if (unlikely(idx >= MAX_RT_PRIO)) {
 179                 WARN_ON(1); /* rt_nr_running was 2 and above! */
 180                 return NULL;
 181         }
 182
 183         queue = array->queue + idx;
 184         next = list_entry(queue->next, struct task_struct, run_list);
 185
 186         return next;
 187 }
 188
 189 static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
 190
 191 /* Will lock the rq it finds */
 192 static struct rq *find_lock_lowest_rq(struct task_struct *task,
 193                                       struct rq *this_rq)
 194 {
 195         struct rq *lowest_rq = NULL;
 196         int cpu;
 197         int tries;
 198         cpumask_t *cpu_mask = &__get_cpu_var(local_cpu_mask);
 199
 200         cpus_and(*cpu_mask, cpu_online_map, task->cpus_allowed);
 201
 202         for (tries = 0; tries < RT_MAX_TRIES; tries++) {
 203                 /*
 204                  * Scan each rq for the lowest prio.
 205                  */
 206                 for_each_cpu_mask(cpu, *cpu_mask) {
 207                         struct rq *rq = &per_cpu(runqueues, cpu);
 208
 209                         if (cpu == this_rq->cpu)
 210                                 continue;
 211
 212                         /* We look for lowest RT prio or non-rt CPU */
 213                         if (rq->rt.highest_prio >= MAX_RT_PRIO) {
 214                                 lowest_rq = rq;
 215                                 break;
 216                         }
 217
 218                         /* no locking for now */
 219                         if (rq->rt.highest_prio > task->prio &&
 220                             (!lowest_rq || rq->rt.highest_prio > lowest_rq->rt.highest_prio)) {
 221                                 lowest_rq = rq;
 222                         }
 223                 }
 224
 225                 if (!lowest_rq)
 226                         break;
 227
 228                 /* if the prio of this runqueue changed, try again */
 229                 if (double_lock_balance(this_rq, lowest_rq)) {
 230                         /*
 231                          * We had to unlock the run queue. In
 232                          * the mean time, task could have
 233                          * migrated already or had its affinity changed.
 234                          * Also make sure that it wasn't scheduled on its rq.
 235                          */
 236                         if (unlikely(task_rq(task) != this_rq ||
 237                                      !cpu_isset(lowest_rq->cpu, task->cpus_allowed) ||
 238                                      task_running(this_rq, task) ||
 239                                      !task->se.on_rq)) {
 240                                 spin_unlock(&lowest_rq->lock);
 241                                 lowest_rq = NULL;
 242                                 break;
 243                         }
 244                 }
 245
 246                 /* If this rq is still suitable use it. */
 247                 if (lowest_rq->rt.highest_prio > task->prio)
 248                         break;
 249
 250                 /* try again */
 251                 spin_unlock(&lowest_rq->lock);
 252                 lowest_rq = NULL;
 253         }
 254
 255         return lowest_rq;
 256 }
 257
 258 /*
 259  * If the current CPU has more than one RT task, see if the non
 260  * running task can migrate over to a CPU that is running a task
 261  * of lesser priority.
 262  */
 263 static int push_rt_task(struct rq *this_rq)
 264 {
 265         struct task_struct *next_task;
 266         struct rq *lowest_rq;
 267         int ret = 0;
 268         int paranoid = RT_MAX_TRIES;
 269
 270         assert_spin_locked(&this_rq->lock);
 271
 272         next_task = pick_next_highest_task_rt(this_rq);
 273         if (!next_task)
 274                 return 0;
 275
 276  retry:
 277         if (unlikely(next_task == this_rq->curr))
 278                 return 0;
 279
 280         /*
 281          * It's possible that the next_task slipped in of
 282          * higher priority than current. If that's the case
 283          * just reschedule current.
 284          */
 285         if (unlikely(next_task->prio < this_rq->curr->prio)) {
 286                 resched_task(this_rq->curr);
 287                 return 0;
 288         }
 289
 290         /* We might release this_rq lock */
 291         get_task_struct(next_task);
 292
 293         /* find_lock_lowest_rq locks the rq if found */
 294         lowest_rq = find_lock_lowest_rq(next_task, this_rq);
 295         if (!lowest_rq) {
 296                 struct task_struct *task;
 297                 /*
 298                  * find lock_lowest_rq releases this_rq->lock
 299                  * so it is possible that next_task has changed.
 300                  * If it has, then try again.
 301                  */
 302                 task = pick_next_highest_task_rt(this_rq);
 303                 if (unlikely(task != next_task) && task && paranoid--) {
 304                         put_task_struct(next_task);
 305                         next_task = task;
 306                         goto retry;
 307                 }
 308                 goto out;
 309         }
 310
 311         assert_spin_locked(&lowest_rq->lock);
 312
 313         deactivate_task(this_rq, next_task, 0);
 314         set_task_cpu(next_task, lowest_rq->cpu);
 315         activate_task(lowest_rq, next_task, 0);
 316
 317         resched_task(lowest_rq->curr);
 318
 319         spin_unlock(&lowest_rq->lock);
 320
 321         ret = 1;
 322 out:
 323         put_task_struct(next_task);
 324
 325         return ret;
 326 }
 327
 328 /*
 329  * TODO: Currently we just use the second highest prio task on
 330  *       the queue, and stop when it can't migrate (or there's
 331  *       no more RT tasks).  There may be a case where a lower
 332  *       priority RT task has a different affinity than the
 333  *       higher RT task. In this case the lower RT task could
 334  *       possibly be able to migrate where as the higher priority
 335  *       RT task could not.  We currently ignore this issue.
 336  *       Enhancements are welcome!
 337  */
 338 static void push_rt_tasks(struct rq *rq)
 339 {
 340         /* push_rt_task will return true if it moved an RT */
 341         while (push_rt_task(rq))
 342                 ;
 343 }
 344
 345 static void schedule_tail_balance_rt(struct rq *rq)
 346 {
 347         /*
 348          * If we have more than one rt_task queued, then
 349          * see if we can push the other rt_tasks off to other CPUS.
 350          * Note we may release the rq lock, and since
 351          * the lock was owned by prev, we need to release it
 352          * first via finish_lock_switch and then reaquire it here.
 353          */
 354         if (unlikely(rq->rt.rt_nr_running > 1)) {
 355                 spin_lock_irq(&rq->lock);
 356                 push_rt_tasks(rq);
 357                 spin_unlock_irq(&rq->lock);
 358         }
 359 }
 360
 361 /*
 362  * Load-balancing iterator. Note: while the runqueue stays locked
 363  * during the whole iteration, the current task might be
 364  * dequeued so the iterator has to be dequeue-safe. Here we
 365  * achieve that by always pre-iterating before returning
 366  * the current task:
 367  */
 368 static struct task_struct *load_balance_start_rt(void *arg)
 369 {
 370         struct rq *rq = arg;
 371         struct rt_prio_array *array = &rq->rt.active;
 372         struct list_head *head, *curr;
 373         struct task_struct *p;
 374         int idx;
 375
 376         idx = sched_find_first_bit(array->bitmap);
 377         if (idx >= MAX_RT_PRIO)
 378                 return NULL;
 379
 380         head = array->queue + idx;
 381         curr = head->prev;
 382
 383         p = list_entry(curr, struct task_struct, run_list);
 384
 385         curr = curr->prev;
 386
 387         rq->rt.rt_load_balance_idx = idx;
 388         rq->rt.rt_load_balance_head = head;
 389         rq->rt.rt_load_balance_curr = curr;
 390
 391         return p;
 392 }
 393
 394 static struct task_struct *load_balance_next_rt(void *arg)
 395 {
 396         struct rq *rq = arg;
 397         struct rt_prio_array *array = &rq->rt.active;
 398         struct list_head *head, *curr;
 399         struct task_struct *p;
 400         int idx;
 401
 402         idx = rq->rt.rt_load_balance_idx;
 403         head = rq->rt.rt_load_balance_head;
 404         curr = rq->rt.rt_load_balance_curr;
 405
 406         /*
 407          * If we arrived back to the head again then
 408          * iterate to the next queue (if any):
 409          */
 410         if (unlikely(head == curr)) {
 411                 int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
 412
 413                 if (next_idx >= MAX_RT_PRIO)
 414                         return NULL;
 415
 416                 idx = next_idx;
 417                 head = array->queue + idx;
 418                 curr = head->prev;
 419
 420                 rq->rt.rt_load_balance_idx = idx;
 421                 rq->rt.rt_load_balance_head = head;
 422         }
 423
 424         p = list_entry(curr, struct task_struct, run_list);
 425
 426         curr = curr->prev;
 427
 428         rq->rt.rt_load_balance_curr = curr;
 429
 430         return p;
 431 }
 432
 433 static unsigned long
 434 load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 435                 unsigned long max_load_move,
 436                 struct sched_domain *sd, enum cpu_idle_type idle,
 437                 int *all_pinned, int *this_best_prio)
 438 {
 439         struct rq_iterator rt_rq_iterator;
 440
 441         rt_rq_iterator.start = load_balance_start_rt;
 442         rt_rq_iterator.next = load_balance_next_rt;
 443         /* pass 'busiest' rq argument into
 444          * load_balance_[start|next]_rt iterators
 445          */
 446         rt_rq_iterator.arg = busiest;
 447
 448         return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
 449                              idle, all_pinned, this_best_prio, &rt_rq_iterator);
 450 }
 451
 452 static int
 453 move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 454                  struct sched_domain *sd, enum cpu_idle_type idle)
 455 {
 456         struct rq_iterator rt_rq_iterator;
 457
 458         rt_rq_iterator.start = load_balance_start_rt;
 459         rt_rq_iterator.next = load_balance_next_rt;
 460         rt_rq_iterator.arg = busiest;
 461
 462         return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
 463                                   &rt_rq_iterator);
 464 }
 465 #else /* CONFIG_SMP */
 466 # define schedule_tail_balance_rt(rq)   do { } while (0)
 467 #endif /* CONFIG_SMP */
 468
 469 static void task_tick_rt(struct rq *rq, struct task_struct *p)
 470 {
 471         update_curr_rt(rq);
 472
 473         /*
 474          * RR tasks need a special form of timeslice management.
 475          * FIFO tasks have no timeslices.
 476          */
 477         if (p->policy != SCHED_RR)
 478                 return;
 479
 480         if (--p->time_slice)
 481                 return;
 482
 483         p->time_slice = DEF_TIMESLICE;
 484
 485         /*
 486          * Requeue to the end of queue if we are not the only element
 487          * on the queue:
 488          */
 489         if (p->run_list.prev != p->run_list.next) {
 490                 requeue_task_rt(rq, p);
 491                 set_tsk_need_resched(p);
 492         }
 493 }
 494
 495 static void set_curr_task_rt(struct rq *rq)
 496 {
 497         struct task_struct *p = rq->curr;
 498
 499         p->se.exec_start = rq->clock;
 500 }
 501
 502 const struct sched_class rt_sched_class = {
 503         .next                   = &fair_sched_class,
 504         .enqueue_task           = enqueue_task_rt,
 505         .dequeue_task           = dequeue_task_rt,
 506         .yield_task             = yield_task_rt,
 507
 508         .check_preempt_curr     = check_preempt_curr_rt,
 509
 510         .pick_next_task         = pick_next_task_rt,
 511         .put_prev_task          = put_prev_task_rt,
 512
 513 #ifdef CONFIG_SMP
 514         .load_balance           = load_balance_rt,
 515         .move_one_task          = move_one_task_rt,
 516 #endif
 517
 518         .set_curr_task          = set_curr_task_rt,
 519         .task_tick              = task_tick_rt,
 520 };