SAFE public projects git trees. - safe/jmp/linux-2.6/blob - kernel/padata.c

   1 /*
   2  * padata.c - generic interface to process data streams in parallel
   3  *
   4  * Copyright (C) 2008, 2009 secunet Security Networks AG
   5  * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
   6  *
   7  * This program is free software; you can redistribute it and/or modify it
   8  * under the terms and conditions of the GNU General Public License,
   9  * version 2, as published by the Free Software Foundation.
  10  *
  11  * This program is distributed in the hope it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14  * more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along with
  17  * this program; if not, write to the Free Software Foundation, Inc.,
  18  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  19  */
  20
  21 #include <linux/module.h>
  22 #include <linux/cpumask.h>
  23 #include <linux/err.h>
  24 #include <linux/cpu.h>
  25 #include <linux/padata.h>
  26 #include <linux/mutex.h>
  27 #include <linux/sched.h>
  28 #include <linux/slab.h>
  29 #include <linux/rcupdate.h>
  30
  31 #define MAX_SEQ_NR INT_MAX - NR_CPUS
  32 #define MAX_OBJ_NUM 1000
  33
  34 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
  35 {
  36         int cpu, target_cpu;
  37
  38         target_cpu = cpumask_first(pd->cpumask);
  39         for (cpu = 0; cpu < cpu_index; cpu++)
  40                 target_cpu = cpumask_next(target_cpu, pd->cpumask);
  41
  42         return target_cpu;
  43 }
  44
  45 static int padata_cpu_hash(struct padata_priv *padata)
  46 {
  47         int cpu_index;
  48         struct parallel_data *pd;
  49
  50         pd =  padata->pd;
  51
  52         /*
  53          * Hash the sequence numbers to the cpus by taking
  54          * seq_nr mod. number of cpus in use.
  55          */
  56         cpu_index =  padata->seq_nr % cpumask_weight(pd->cpumask);
  57
  58         return padata_index_to_cpu(pd, cpu_index);
  59 }
  60
  61 static void padata_parallel_worker(struct work_struct *work)
  62 {
  63         struct padata_queue *queue;
  64         struct parallel_data *pd;
  65         struct padata_instance *pinst;
  66         LIST_HEAD(local_list);
  67
  68         local_bh_disable();
  69         queue = container_of(work, struct padata_queue, pwork);
  70         pd = queue->pd;
  71         pinst = pd->pinst;
  72
  73         spin_lock(&queue->parallel.lock);
  74         list_replace_init(&queue->parallel.list, &local_list);
  75         spin_unlock(&queue->parallel.lock);
  76
  77         while (!list_empty(&local_list)) {
  78                 struct padata_priv *padata;
  79
  80                 padata = list_entry(local_list.next,
  81                                     struct padata_priv, list);
  82
  83                 list_del_init(&padata->list);
  84
  85                 padata->parallel(padata);
  86         }
  87
  88         local_bh_enable();
  89 }
  90
  91 /*
  92  * padata_do_parallel - padata parallelization function
  93  *
  94  * @pinst: padata instance
  95  * @padata: object to be parallelized
  96  * @cb_cpu: cpu the serialization callback function will run on,
  97  *          must be in the cpumask of padata.
  98  *
  99  * The parallelization callback function will run with BHs off.
 100  * Note: Every object which is parallelized by padata_do_parallel
 101  * must be seen by padata_do_serial.
 102  */
 103 int padata_do_parallel(struct padata_instance *pinst,
 104                        struct padata_priv *padata, int cb_cpu)
 105 {
 106         int target_cpu, err;
 107         struct padata_queue *queue;
 108         struct parallel_data *pd;
 109
 110         rcu_read_lock_bh();
 111
 112         pd = rcu_dereference(pinst->pd);
 113
 114         err = 0;
 115         if (!(pinst->flags & PADATA_INIT))
 116                 goto out;
 117
 118         err =  -EBUSY;
 119         if ((pinst->flags & PADATA_RESET))
 120                 goto out;
 121
 122         if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
 123                 goto out;
 124
 125         err = -EINVAL;
 126         if (!cpumask_test_cpu(cb_cpu, pd->cpumask))
 127                 goto out;
 128
 129         err = -EINPROGRESS;
 130         atomic_inc(&pd->refcnt);
 131         padata->pd = pd;
 132         padata->cb_cpu = cb_cpu;
 133
 134         if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr))
 135                 atomic_set(&pd->seq_nr, -1);
 136
 137         padata->seq_nr = atomic_inc_return(&pd->seq_nr);
 138
 139         target_cpu = padata_cpu_hash(padata);
 140         queue = per_cpu_ptr(pd->queue, target_cpu);
 141
 142         spin_lock(&queue->parallel.lock);
 143         list_add_tail(&padata->list, &queue->parallel.list);
 144         spin_unlock(&queue->parallel.lock);
 145
 146         queue_work_on(target_cpu, pinst->wq, &queue->pwork);
 147
 148 out:
 149         rcu_read_unlock_bh();
 150
 151         return err;
 152 }
 153 EXPORT_SYMBOL(padata_do_parallel);
 154
 155 static struct padata_priv *padata_get_next(struct parallel_data *pd)
 156 {
 157         int cpu, num_cpus, empty, calc_seq_nr;
 158         int seq_nr, next_nr, overrun, next_overrun;
 159         struct padata_queue *queue, *next_queue;
 160         struct padata_priv *padata;
 161         struct padata_list *reorder;
 162
 163         empty = 0;
 164         next_nr = -1;
 165         next_overrun = 0;
 166         next_queue = NULL;
 167
 168         num_cpus = cpumask_weight(pd->cpumask);
 169
 170         for_each_cpu(cpu, pd->cpumask) {
 171                 queue = per_cpu_ptr(pd->queue, cpu);
 172                 reorder = &queue->reorder;
 173
 174                 /*
 175                  * Calculate the seq_nr of the object that should be
 176                  * next in this queue.
 177                  */
 178                 overrun = 0;
 179                 calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
 180                                + queue->cpu_index;
 181
 182                 if (unlikely(calc_seq_nr > pd->max_seq_nr)) {
 183                         calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1;
 184                         overrun = 1;
 185                 }
 186
 187                 if (!list_empty(&reorder->list)) {
 188                         padata = list_entry(reorder->list.next,
 189                                             struct padata_priv, list);
 190
 191                         seq_nr  = padata->seq_nr;
 192                         BUG_ON(calc_seq_nr != seq_nr);
 193                 } else {
 194                         seq_nr = calc_seq_nr;
 195                         empty++;
 196                 }
 197
 198                 if (next_nr < 0 || seq_nr < next_nr
 199                     || (next_overrun && !overrun)) {
 200                         next_nr = seq_nr;
 201                         next_overrun = overrun;
 202                         next_queue = queue;
 203                 }
 204         }
 205
 206         padata = NULL;
 207
 208         if (empty == num_cpus)
 209                 goto out;
 210
 211         reorder = &next_queue->reorder;
 212
 213         if (!list_empty(&reorder->list)) {
 214                 padata = list_entry(reorder->list.next,
 215                                     struct padata_priv, list);
 216
 217                 if (unlikely(next_overrun)) {
 218                         for_each_cpu(cpu, pd->cpumask) {
 219                                 queue = per_cpu_ptr(pd->queue, cpu);
 220                                 atomic_set(&queue->num_obj, 0);
 221                         }
 222                 }
 223
 224                 spin_lock(&reorder->lock);
 225                 list_del_init(&padata->list);
 226                 atomic_dec(&pd->reorder_objects);
 227                 spin_unlock(&reorder->lock);
 228
 229                 atomic_inc(&next_queue->num_obj);
 230
 231                 goto out;
 232         }
 233
 234         queue = per_cpu_ptr(pd->queue, smp_processor_id());
 235         if (queue->cpu_index == next_queue->cpu_index) {
 236                 padata = ERR_PTR(-ENODATA);
 237                 goto out;
 238         }
 239
 240         padata = ERR_PTR(-EINPROGRESS);
 241 out:
 242         return padata;
 243 }
 244
 245 static void padata_reorder(struct parallel_data *pd)
 246 {
 247         struct padata_priv *padata;
 248         struct padata_queue *queue;
 249         struct padata_instance *pinst = pd->pinst;
 250
 251         if (!spin_trylock_bh(&pd->lock))
 252                 return;
 253
 254         while (1) {
 255                 padata = padata_get_next(pd);
 256
 257                 if (!padata || PTR_ERR(padata) == -EINPROGRESS)
 258                         break;
 259
 260                 if (PTR_ERR(padata) == -ENODATA) {
 261                         del_timer(&pd->timer);
 262                         spin_unlock_bh(&pd->lock);
 263                         return;
 264                 }
 265
 266                 queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
 267
 268                 spin_lock(&queue->serial.lock);
 269                 list_add_tail(&padata->list, &queue->serial.list);
 270                 spin_unlock(&queue->serial.lock);
 271
 272                 queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork);
 273         }
 274
 275         spin_unlock_bh(&pd->lock);
 276
 277         if (atomic_read(&pd->reorder_objects)
 278                         && !(pinst->flags & PADATA_RESET))
 279                 mod_timer(&pd->timer, jiffies + HZ);
 280         else
 281                 del_timer(&pd->timer);
 282
 283         return;
 284 }
 285
 286 static void padata_reorder_timer(unsigned long arg)
 287 {
 288         struct parallel_data *pd = (struct parallel_data *)arg;
 289
 290         padata_reorder(pd);
 291 }
 292
 293 static void padata_serial_worker(struct work_struct *work)
 294 {
 295         struct padata_queue *queue;
 296         struct parallel_data *pd;
 297         LIST_HEAD(local_list);
 298
 299         local_bh_disable();
 300         queue = container_of(work, struct padata_queue, swork);
 301         pd = queue->pd;
 302
 303         spin_lock(&queue->serial.lock);
 304         list_replace_init(&queue->serial.list, &local_list);
 305         spin_unlock(&queue->serial.lock);
 306
 307         while (!list_empty(&local_list)) {
 308                 struct padata_priv *padata;
 309
 310                 padata = list_entry(local_list.next,
 311                                     struct padata_priv, list);
 312
 313                 list_del_init(&padata->list);
 314
 315                 padata->serial(padata);
 316                 atomic_dec(&pd->refcnt);
 317         }
 318         local_bh_enable();
 319 }
 320
 321 /*
 322  * padata_do_serial - padata serialization function
 323  *
 324  * @padata: object to be serialized.
 325  *
 326  * padata_do_serial must be called for every parallelized object.
 327  * The serialization callback function will run with BHs off.
 328  */
 329 void padata_do_serial(struct padata_priv *padata)
 330 {
 331         int cpu;
 332         struct padata_queue *queue;
 333         struct parallel_data *pd;
 334
 335         pd = padata->pd;
 336
 337         cpu = get_cpu();
 338         queue = per_cpu_ptr(pd->queue, cpu);
 339
 340         spin_lock(&queue->reorder.lock);
 341         atomic_inc(&pd->reorder_objects);
 342         list_add_tail(&padata->list, &queue->reorder.list);
 343         spin_unlock(&queue->reorder.lock);
 344
 345         put_cpu();
 346
 347         padata_reorder(pd);
 348 }
 349 EXPORT_SYMBOL(padata_do_serial);
 350
 351 static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 352                                              const struct cpumask *cpumask)
 353 {
 354         int cpu, cpu_index, num_cpus;
 355         struct padata_queue *queue;
 356         struct parallel_data *pd;
 357
 358         cpu_index = 0;
 359
 360         pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
 361         if (!pd)
 362                 goto err;
 363
 364         pd->queue = alloc_percpu(struct padata_queue);
 365         if (!pd->queue)
 366                 goto err_free_pd;
 367
 368         if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
 369                 goto err_free_queue;
 370
 371         cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
 372
 373         for_each_cpu(cpu, pd->cpumask) {
 374                 queue = per_cpu_ptr(pd->queue, cpu);
 375
 376                 queue->pd = pd;
 377
 378                 queue->cpu_index = cpu_index;
 379                 cpu_index++;
 380
 381                 INIT_LIST_HEAD(&queue->reorder.list);
 382                 INIT_LIST_HEAD(&queue->parallel.list);
 383                 INIT_LIST_HEAD(&queue->serial.list);
 384                 spin_lock_init(&queue->reorder.lock);
 385                 spin_lock_init(&queue->parallel.lock);
 386                 spin_lock_init(&queue->serial.lock);
 387
 388                 INIT_WORK(&queue->pwork, padata_parallel_worker);
 389                 INIT_WORK(&queue->swork, padata_serial_worker);
 390                 atomic_set(&queue->num_obj, 0);
 391         }
 392
 393         num_cpus = cpumask_weight(pd->cpumask);
 394         pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
 395
 396         setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
 397         atomic_set(&pd->seq_nr, -1);
 398         atomic_set(&pd->reorder_objects, 0);
 399         atomic_set(&pd->refcnt, 0);
 400         pd->pinst = pinst;
 401         spin_lock_init(&pd->lock);
 402
 403         return pd;
 404
 405 err_free_queue:
 406         free_percpu(pd->queue);
 407 err_free_pd:
 408         kfree(pd);
 409 err:
 410         return NULL;
 411 }
 412
 413 static void padata_free_pd(struct parallel_data *pd)
 414 {
 415         free_cpumask_var(pd->cpumask);
 416         free_percpu(pd->queue);
 417         kfree(pd);
 418 }
 419
 420 static void padata_replace(struct padata_instance *pinst,
 421                            struct parallel_data *pd_new)
 422 {
 423         struct parallel_data *pd_old = pinst->pd;
 424
 425         pinst->flags |= PADATA_RESET;
 426
 427         rcu_assign_pointer(pinst->pd, pd_new);
 428
 429         synchronize_rcu();
 430
 431         while (atomic_read(&pd_old->refcnt) != 0)
 432                 yield();
 433
 434         flush_workqueue(pinst->wq);
 435
 436         padata_free_pd(pd_old);
 437
 438         pinst->flags &= ~PADATA_RESET;
 439 }
 440
 441 /*
 442  * padata_set_cpumask - set the cpumask that padata should use
 443  *
 444  * @pinst: padata instance
 445  * @cpumask: the cpumask to use
 446  */
 447 int padata_set_cpumask(struct padata_instance *pinst,
 448                         cpumask_var_t cpumask)
 449 {
 450         struct parallel_data *pd;
 451         int err = 0;
 452
 453         mutex_lock(&pinst->lock);
 454
 455         get_online_cpus();
 456
 457         pd = padata_alloc_pd(pinst, cpumask);
 458         if (!pd) {
 459                 err = -ENOMEM;
 460                 goto out;
 461         }
 462
 463         cpumask_copy(pinst->cpumask, cpumask);
 464
 465         padata_replace(pinst, pd);
 466
 467 out:
 468         put_online_cpus();
 469
 470         mutex_unlock(&pinst->lock);
 471
 472         return err;
 473 }
 474 EXPORT_SYMBOL(padata_set_cpumask);
 475
 476 static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
 477 {
 478         struct parallel_data *pd;
 479
 480         if (cpumask_test_cpu(cpu, cpu_active_mask)) {
 481                 pd = padata_alloc_pd(pinst, pinst->cpumask);
 482                 if (!pd)
 483                         return -ENOMEM;
 484
 485                 padata_replace(pinst, pd);
 486         }
 487
 488         return 0;
 489 }
 490
 491 /*
 492  * padata_add_cpu - add a cpu to the padata cpumask
 493  *
 494  * @pinst: padata instance
 495  * @cpu: cpu to add
 496  */
 497 int padata_add_cpu(struct padata_instance *pinst, int cpu)
 498 {
 499         int err;
 500
 501         mutex_lock(&pinst->lock);
 502
 503         get_online_cpus();
 504         cpumask_set_cpu(cpu, pinst->cpumask);
 505         err = __padata_add_cpu(pinst, cpu);
 506         put_online_cpus();
 507
 508         mutex_unlock(&pinst->lock);
 509
 510         return err;
 511 }
 512 EXPORT_SYMBOL(padata_add_cpu);
 513
 514 static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
 515 {
 516         struct parallel_data *pd;
 517
 518         if (cpumask_test_cpu(cpu, cpu_online_mask)) {
 519                 pd = padata_alloc_pd(pinst, pinst->cpumask);
 520                 if (!pd)
 521                         return -ENOMEM;
 522
 523                 padata_replace(pinst, pd);
 524         }
 525
 526         return 0;
 527 }
 528
 529 /*
 530  * padata_remove_cpu - remove a cpu from the padata cpumask
 531  *
 532  * @pinst: padata instance
 533  * @cpu: cpu to remove
 534  */
 535 int padata_remove_cpu(struct padata_instance *pinst, int cpu)
 536 {
 537         int err;
 538
 539         mutex_lock(&pinst->lock);
 540
 541         get_online_cpus();
 542         cpumask_clear_cpu(cpu, pinst->cpumask);
 543         err = __padata_remove_cpu(pinst, cpu);
 544         put_online_cpus();
 545
 546         mutex_unlock(&pinst->lock);
 547
 548         return err;
 549 }
 550 EXPORT_SYMBOL(padata_remove_cpu);
 551
 552 /*
 553  * padata_start - start the parallel processing
 554  *
 555  * @pinst: padata instance to start
 556  */
 557 void padata_start(struct padata_instance *pinst)
 558 {
 559         mutex_lock(&pinst->lock);
 560         pinst->flags |= PADATA_INIT;
 561         mutex_unlock(&pinst->lock);
 562 }
 563 EXPORT_SYMBOL(padata_start);
 564
 565 /*
 566  * padata_stop - stop the parallel processing
 567  *
 568  * @pinst: padata instance to stop
 569  */
 570 void padata_stop(struct padata_instance *pinst)
 571 {
 572         mutex_lock(&pinst->lock);
 573         pinst->flags &= ~PADATA_INIT;
 574         mutex_unlock(&pinst->lock);
 575 }
 576 EXPORT_SYMBOL(padata_stop);
 577
 578 #ifdef CONFIG_HOTPLUG_CPU
 579 static int padata_cpu_callback(struct notifier_block *nfb,
 580                                unsigned long action, void *hcpu)
 581 {
 582         int err;
 583         struct padata_instance *pinst;
 584         int cpu = (unsigned long)hcpu;
 585
 586         pinst = container_of(nfb, struct padata_instance, cpu_notifier);
 587
 588         switch (action) {
 589         case CPU_ONLINE:
 590         case CPU_ONLINE_FROZEN:
 591                 if (!cpumask_test_cpu(cpu, pinst->cpumask))
 592                         break;
 593                 mutex_lock(&pinst->lock);
 594                 err = __padata_add_cpu(pinst, cpu);
 595                 mutex_unlock(&pinst->lock);
 596                 if (err)
 597                         return NOTIFY_BAD;
 598                 break;
 599
 600         case CPU_DOWN_PREPARE:
 601         case CPU_DOWN_PREPARE_FROZEN:
 602                 if (!cpumask_test_cpu(cpu, pinst->cpumask))
 603                         break;
 604                 mutex_lock(&pinst->lock);
 605                 err = __padata_remove_cpu(pinst, cpu);
 606                 mutex_unlock(&pinst->lock);
 607                 if (err)
 608                         return NOTIFY_BAD;
 609                 break;
 610
 611         case CPU_UP_CANCELED:
 612         case CPU_UP_CANCELED_FROZEN:
 613                 if (!cpumask_test_cpu(cpu, pinst->cpumask))
 614                         break;
 615                 mutex_lock(&pinst->lock);
 616                 __padata_remove_cpu(pinst, cpu);
 617                 mutex_unlock(&pinst->lock);
 618
 619         case CPU_DOWN_FAILED:
 620         case CPU_DOWN_FAILED_FROZEN:
 621                 if (!cpumask_test_cpu(cpu, pinst->cpumask))
 622                         break;
 623                 mutex_lock(&pinst->lock);
 624                 __padata_add_cpu(pinst, cpu);
 625                 mutex_unlock(&pinst->lock);
 626         }
 627
 628         return NOTIFY_OK;
 629 }
 630 #endif
 631
 632 /*
 633  * padata_alloc - allocate and initialize a padata instance
 634  *
 635  * @cpumask: cpumask that padata uses for parallelization
 636  * @wq: workqueue to use for the allocated padata instance
 637  */
 638 struct padata_instance *padata_alloc(const struct cpumask *cpumask,
 639                                      struct workqueue_struct *wq)
 640 {
 641         struct padata_instance *pinst;
 642         struct parallel_data *pd;
 643
 644         pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
 645         if (!pinst)
 646                 goto err;
 647
 648         get_online_cpus();
 649
 650         pd = padata_alloc_pd(pinst, cpumask);
 651         if (!pd)
 652                 goto err_free_inst;
 653
 654         if (!alloc_cpumask_var(&pinst->cpumask, GFP_KERNEL))
 655                 goto err_free_pd;
 656
 657         rcu_assign_pointer(pinst->pd, pd);
 658
 659         pinst->wq = wq;
 660
 661         cpumask_copy(pinst->cpumask, cpumask);
 662
 663         pinst->flags = 0;
 664
 665 #ifdef CONFIG_HOTPLUG_CPU
 666         pinst->cpu_notifier.notifier_call = padata_cpu_callback;
 667         pinst->cpu_notifier.priority = 0;
 668         register_hotcpu_notifier(&pinst->cpu_notifier);
 669 #endif
 670
 671         put_online_cpus();
 672
 673         mutex_init(&pinst->lock);
 674
 675         return pinst;
 676
 677 err_free_pd:
 678         padata_free_pd(pd);
 679 err_free_inst:
 680         kfree(pinst);
 681         put_online_cpus();
 682 err:
 683         return NULL;
 684 }
 685 EXPORT_SYMBOL(padata_alloc);
 686
 687 /*
 688  * padata_free - free a padata instance
 689  *
 690  * @ padata_inst: padata instance to free
 691  */
 692 void padata_free(struct padata_instance *pinst)
 693 {
 694         padata_stop(pinst);
 695
 696         synchronize_rcu();
 697
 698         while (atomic_read(&pinst->pd->refcnt) != 0)
 699                 yield();
 700
 701 #ifdef CONFIG_HOTPLUG_CPU
 702         unregister_hotcpu_notifier(&pinst->cpu_notifier);
 703 #endif
 704         padata_free_pd(pinst->pd);
 705         free_cpumask_var(pinst->cpumask);
 706         kfree(pinst);
 707 }
 708 EXPORT_SYMBOL(padata_free);