netns xfrm: /proc/net/xfrm_stat in netns
[safe/jmp/linux-2.6] / kernel / workqueue.c
index 6fd158b..d4dc69d 100644 (file)
@@ -9,7 +9,7 @@
  * Derived from the taskqueue/keventd code by:
  *
  *   David Woodhouse <dwmw2@infradead.org>
- *   Andrew Morton <andrewm@uow.edu.au>
+ *   Andrew Morton
  *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
  *   Theodore Ts'o <tytso@mit.edu>
  *
@@ -62,6 +62,7 @@ struct workqueue_struct {
        const char *name;
        int singlethread;
        int freezeable;         /* Freeze threads during suspend */
+       int rt;
 #ifdef CONFIG_LOCKDEP
        struct lockdep_map lockdep_map;
 #endif
@@ -125,7 +126,7 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 }
 
 static void insert_work(struct cpu_workqueue_struct *cwq,
-                               struct work_struct *work, int tail)
+                       struct work_struct *work, struct list_head *head)
 {
        set_wq_data(work, cwq);
        /*
@@ -133,10 +134,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
         * result of list_add() below, see try_to_grab_pending().
         */
        smp_wmb();
-       if (tail)
-               list_add_tail(&work->entry, &cwq->worklist);
-       else
-               list_add(&work->entry, &cwq->worklist);
+       list_add_tail(&work->entry, head);
        wake_up(&cwq->more_work);
 }
 
@@ -146,7 +144,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
        unsigned long flags;
 
        spin_lock_irqsave(&cwq->lock, flags);
-       insert_work(cwq, work, 1);
+       insert_work(cwq, work, &cwq->worklist);
        spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -162,14 +160,11 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
  */
 int queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
-       int ret = 0;
+       int ret;
+
+       ret = queue_work_on(get_cpu(), wq, work);
+       put_cpu();
 
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
-               BUG_ON(!list_empty(&work->entry));
-               __queue_work(wq_per_cpu(wq, get_cpu()), work);
-               put_cpu();
-               ret = 1;
-       }
        return ret;
 }
 EXPORT_SYMBOL_GPL(queue_work);
@@ -296,11 +291,11 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 
                BUG_ON(get_wq_data(work) != cwq);
                work_clear_pending(work);
-               lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
-               lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+               lock_map_acquire(&cwq->wq->lockdep_map);
+               lock_map_acquire(&lockdep_map);
                f(work);
-               lock_release(&lockdep_map, 1, _THIS_IP_);
-               lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+               lock_map_release(&lockdep_map);
+               lock_map_release(&cwq->wq->lockdep_map);
 
                if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
                        printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
@@ -361,14 +356,14 @@ static void wq_barrier_func(struct work_struct *work)
 }
 
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
-                                       struct wq_barrier *barr, int tail)
+                       struct wq_barrier *barr, struct list_head *head)
 {
        INIT_WORK(&barr->work, wq_barrier_func);
        __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
 
        init_completion(&barr->done);
 
-       insert_work(cwq, &barr->work, tail);
+       insert_work(cwq, &barr->work, head);
 }
 
 static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -388,7 +383,7 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
                active = 0;
                spin_lock_irq(&cwq->lock);
                if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
-                       insert_wq_barrier(cwq, &barr, 1);
+                       insert_wq_barrier(cwq, &barr, &cwq->worklist);
                        active = 1;
                }
                spin_unlock_irq(&cwq->lock);
@@ -419,13 +414,64 @@ void flush_workqueue(struct workqueue_struct *wq)
        int cpu;
 
        might_sleep();
-       lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
-       lock_release(&wq->lockdep_map, 1, _THIS_IP_);
+       lock_map_acquire(&wq->lockdep_map);
+       lock_map_release(&wq->lockdep_map);
        for_each_cpu_mask_nr(cpu, *cpu_map)
                flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @work: the work which is to be flushed
+ *
+ * Returns false if @work has already terminated.
+ *
+ * It is expected that, prior to calling flush_work(), the caller has
+ * arranged for the work to not be requeued, otherwise it doesn't make
+ * sense to use this function.
+ */
+int flush_work(struct work_struct *work)
+{
+       struct cpu_workqueue_struct *cwq;
+       struct list_head *prev;
+       struct wq_barrier barr;
+
+       might_sleep();
+       cwq = get_wq_data(work);
+       if (!cwq)
+               return 0;
+
+       lock_map_acquire(&cwq->wq->lockdep_map);
+       lock_map_release(&cwq->wq->lockdep_map);
+
+       prev = NULL;
+       spin_lock_irq(&cwq->lock);
+       if (!list_empty(&work->entry)) {
+               /*
+                * See the comment near try_to_grab_pending()->smp_rmb().
+                * If it was re-queued under us we are not going to wait.
+                */
+               smp_rmb();
+               if (unlikely(cwq != get_wq_data(work)))
+                       goto out;
+               prev = &work->entry;
+       } else {
+               if (cwq->current_work != work)
+                       goto out;
+               prev = &cwq->worklist;
+       }
+       insert_wq_barrier(cwq, &barr, prev->next);
+out:
+       spin_unlock_irq(&cwq->lock);
+       if (!prev)
+               return 0;
+
+       wait_for_completion(&barr.done);
+       return 1;
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
 /*
  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
@@ -473,7 +519,7 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
 
        spin_lock_irq(&cwq->lock);
        if (unlikely(cwq->current_work == work)) {
-               insert_wq_barrier(cwq, &barr, 0);
+               insert_wq_barrier(cwq, &barr, cwq->worklist.next);
                running = 1;
        }
        spin_unlock_irq(&cwq->lock);
@@ -491,8 +537,8 @@ static void wait_on_work(struct work_struct *work)
 
        might_sleep();
 
-       lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
-       lock_release(&work->lockdep_map, 1, _THIS_IP_);
+       lock_map_acquire(&work->lockdep_map);
+       lock_map_release(&work->lockdep_map);
 
        cwq = get_wq_data(work);
        if (!cwq)
@@ -644,10 +690,10 @@ int schedule_on_each_cpu(work_func_t func)
                struct work_struct *work = per_cpu_ptr(works, cpu);
 
                INIT_WORK(work, func);
-               set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
-               __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
+               schedule_work_on(cpu, work);
        }
-       flush_workqueue(keventd_wq);
+       for_each_online_cpu(cpu)
+               flush_work(per_cpu_ptr(works, cpu));
        put_online_cpus();
        free_percpu(works);
        return 0;
@@ -721,6 +767,7 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
 
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
+       struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
        struct workqueue_struct *wq = cwq->wq;
        const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
        struct task_struct *p;
@@ -736,7 +783,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
         */
        if (IS_ERR(p))
                return PTR_ERR(p);
-
+       if (cwq->wq->rt)
+               sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
        cwq->thread = p;
 
        return 0;
@@ -756,6 +804,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 struct workqueue_struct *__create_workqueue_key(const char *name,
                                                int singlethread,
                                                int freezeable,
+                                               int rt,
                                                struct lock_class_key *key,
                                                const char *lock_name)
 {
@@ -777,6 +826,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
        lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
        wq->singlethread = singlethread;
        wq->freezeable = freezeable;
+       wq->rt = rt;
        INIT_LIST_HEAD(&wq->list);
 
        if (singlethread) {
@@ -784,11 +834,22 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
                err = create_workqueue_thread(cwq, singlethread_cpu);
                start_workqueue_thread(cwq, -1);
        } else {
-               get_online_cpus();
+               cpu_maps_update_begin();
+               /*
+                * We must place this wq on list even if the code below fails.
+                * cpu_down(cpu) can remove cpu from cpu_populated_map before
+                * destroy_workqueue() takes the lock, in that case we leak
+                * cwq[cpu]->thread.
+                */
                spin_lock(&workqueue_lock);
                list_add(&wq->list, &workqueues);
                spin_unlock(&workqueue_lock);
-
+               /*
+                * We must initialize cwqs for each possible cpu even if we
+                * are going to call destroy_workqueue() finally. Otherwise
+                * cpu_up() can hit the uninitialized cwq once we drop the
+                * lock.
+                */
                for_each_possible_cpu(cpu) {
                        cwq = init_cpu_workqueue(wq, cpu);
                        if (err || !cpu_online(cpu))
@@ -796,7 +857,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
                        err = create_workqueue_thread(cwq, cpu);
                        start_workqueue_thread(cwq, cpu);
                }
-               put_online_cpus();
+               cpu_maps_update_done();
        }
 
        if (err) {
@@ -810,18 +871,18 @@ EXPORT_SYMBOL_GPL(__create_workqueue_key);
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
        /*
-        * Our caller is either destroy_workqueue() or CPU_DEAD,
-        * get_online_cpus() protects cwq->thread.
+        * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
+        * cpu_add_remove_lock protects cwq->thread.
         */
        if (cwq->thread == NULL)
                return;
 
-       lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
-       lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+       lock_map_acquire(&cwq->wq->lockdep_map);
+       lock_map_release(&cwq->wq->lockdep_map);
 
        flush_cpu_workqueue(cwq);
        /*
-        * If the caller is CPU_DEAD and cwq->worklist was not empty,
+        * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
         * a concurrent flush_workqueue() can insert a barrier after us.
         * However, in that case run_workqueue() won't return and check
         * kthread_should_stop() until it flushes all work_struct's.
@@ -845,14 +906,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
        const cpumask_t *cpu_map = wq_cpu_map(wq);
        int cpu;
 
-       get_online_cpus();
+       cpu_maps_update_begin();
        spin_lock(&workqueue_lock);
        list_del(&wq->list);
        spin_unlock(&workqueue_lock);
 
        for_each_cpu_mask_nr(cpu, *cpu_map)
                cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
-       put_online_cpus();
+       cpu_maps_update_done();
 
        free_percpu(wq->cpu_wq);
        kfree(wq);
@@ -866,6 +927,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
        unsigned int cpu = (unsigned long)hcpu;
        struct cpu_workqueue_struct *cwq;
        struct workqueue_struct *wq;
+       int ret = NOTIFY_OK;
 
        action &= ~CPU_TASKS_FROZEN;
 
@@ -873,7 +935,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
        case CPU_UP_PREPARE:
                cpu_set(cpu, cpu_populated_map);
        }
-
+undo:
        list_for_each_entry(wq, &workqueues, list) {
                cwq = per_cpu_ptr(wq->cpu_wq, cpu);
 
@@ -883,7 +945,9 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
                                break;
                        printk(KERN_ERR "workqueue [%s] for %i failed\n",
                                wq->name, cpu);
-                       return NOTIFY_BAD;
+                       action = CPU_UP_CANCELED;
+                       ret = NOTIFY_BAD;
+                       goto undo;
 
                case CPU_ONLINE:
                        start_workqueue_thread(cwq, cpu);
@@ -891,7 +955,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
                case CPU_UP_CANCELED:
                        start_workqueue_thread(cwq, -1);
-               case CPU_DEAD:
+               case CPU_POST_DEAD:
                        cleanup_workqueue_thread(cwq);
                        break;
                }
@@ -899,12 +963,57 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
        switch (action) {
        case CPU_UP_CANCELED:
-       case CPU_DEAD:
+       case CPU_POST_DEAD:
                cpu_clear(cpu, cpu_populated_map);
        }
 
-       return NOTIFY_OK;
+       return ret;
+}
+
+#ifdef CONFIG_SMP
+struct work_for_cpu {
+       struct work_struct work;
+       long (*fn)(void *);
+       void *arg;
+       long ret;
+};
+
+static void do_work_for_cpu(struct work_struct *w)
+{
+       struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
+
+       wfc->ret = wfc->fn(wfc->arg);
+}
+
+/**
+ * work_on_cpu - run a function in user context on a particular cpu
+ * @cpu: the cpu to run on
+ * @fn: the function to run
+ * @arg: the function arg
+ *
+ * This will return -EINVAL in the cpu is not online, or the return value
+ * of @fn otherwise.
+ */
+long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+{
+       struct work_for_cpu wfc;
+
+       INIT_WORK(&wfc.work, do_work_for_cpu);
+       wfc.fn = fn;
+       wfc.arg = arg;
+       get_online_cpus();
+       if (unlikely(!cpu_online(cpu)))
+               wfc.ret = -EINVAL;
+       else {
+               schedule_work_on(cpu, &wfc.work);
+               flush_work(&wfc.work);
+       }
+       put_online_cpus();
+
+       return wfc.ret;
 }
+EXPORT_SYMBOL_GPL(work_on_cpu);
+#endif /* CONFIG_SMP */
 
 void __init init_workqueues(void)
 {