cgroups: make cftype.unregister_event() void-returning
[safe/jmp/linux-2.6] / mm / backing-dev.c
index 22c45e9..660a87a 100644 (file)
 #include <linux/writeback.h>
 #include <linux/device.h>
 
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
+
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
 }
 EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
+       .name           = "default",
        .ra_pages       = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
        .state          = 0,
        .capabilities   = BDI_CAP_MAP_COPY,
@@ -24,7 +27,18 @@ struct backing_dev_info default_backing_dev_info = {
 };
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
+struct backing_dev_info noop_backing_dev_info = {
+       .name           = "noop",
+};
+EXPORT_SYMBOL_GPL(noop_backing_dev_info);
+
 static struct class *bdi_class;
+
+/*
+ * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
+ * reader side protection for bdi_pending_list. bdi_list has RCU reader side
+ * locking.
+ */
 DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
 LIST_HEAD(bdi_pending_list);
@@ -34,7 +48,6 @@ static struct timer_list sync_supers_timer;
 
 static int bdi_sync_supers(void *);
 static void sync_supers_timer_fn(unsigned long);
-static void arm_supers_timer(void);
 
 static void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
 
@@ -85,7 +98,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
                   "BdiDirtyThresh:   %8lu kB\n"
                   "DirtyThresh:      %8lu kB\n"
                   "BackgroundThresh: %8lu kB\n"
-                  "WriteBack threads:%8lu\n"
+                  "WritebackThreads: %8lu\n"
                   "b_dirty:          %8lu\n"
                   "b_io:             %8lu\n"
                   "b_more_io:        %8lu\n"
@@ -220,6 +233,9 @@ static struct device_attribute bdi_dev_attrs[] = {
 static __init int bdi_class_init(void)
 {
        bdi_class = class_create(THIS_MODULE, "bdi");
+       if (IS_ERR(bdi_class))
+               return PTR_ERR(bdi_class);
+
        bdi_class->dev_attrs = bdi_dev_attrs;
        bdi_debug_init();
        return 0;
@@ -235,7 +251,7 @@ static int __init default_bdi_init(void)
 
        init_timer(&sync_supers_timer);
        setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
-       arm_supers_timer();
+       bdi_arm_supers_timer();
 
        err = bdi_init(&default_backing_dev_info);
        if (!err)
@@ -283,9 +299,9 @@ static int bdi_start_fn(void *ptr)
        /*
         * Add us to the active bdi_list
         */
-       spin_lock(&bdi_lock);
-       list_add(&bdi->bdi_list, &bdi_list);
-       spin_unlock(&bdi_lock);
+       spin_lock_bh(&bdi_lock);
+       list_add_rcu(&bdi->bdi_list, &bdi_list);
+       spin_unlock_bh(&bdi_lock);
 
        bdi_task_init(bdi, wb);
 
@@ -357,10 +373,13 @@ static int bdi_sync_supers(void *unused)
        return 0;
 }
 
-static void arm_supers_timer(void)
+void bdi_arm_supers_timer(void)
 {
        unsigned long next;
 
+       if (!dirty_writeback_interval)
+               return;
+
        next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
        mod_timer(&sync_supers_timer, round_jiffies_up(next));
 }
@@ -368,7 +387,7 @@ static void arm_supers_timer(void)
 static void sync_supers_timer_fn(unsigned long unused)
 {
        wake_up_process(sync_supers_tsk);
-       arm_supers_timer();
+       bdi_arm_supers_timer();
 }
 
 static int bdi_forker_task(void *ptr)
@@ -388,7 +407,7 @@ static int bdi_forker_task(void *ptr)
                if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
                        wb_do_writeback(me, 0);
 
-               spin_lock(&bdi_lock);
+               spin_lock_bh(&bdi_lock);
 
                /*
                 * Check if any existing bdi's have dirty data without
@@ -409,9 +428,12 @@ static int bdi_forker_task(void *ptr)
                if (list_empty(&bdi_pending_list)) {
                        unsigned long wait;
 
-                       spin_unlock(&bdi_lock);
+                       spin_unlock_bh(&bdi_lock);
                        wait = msecs_to_jiffies(dirty_writeback_interval * 10);
-                       schedule_timeout(wait);
+                       if (wait)
+                               schedule_timeout(wait);
+                       else
+                               schedule();
                        try_to_freeze();
                        continue;
                }
@@ -425,7 +447,7 @@ static int bdi_forker_task(void *ptr)
                bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
                                 bdi_list);
                list_del_init(&bdi->bdi_list);
-               spin_unlock(&bdi_lock);
+               spin_unlock_bh(&bdi_lock);
 
                wb = &bdi->wb;
                wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
@@ -444,9 +466,9 @@ static int bdi_forker_task(void *ptr)
                         * a chance to flush other bdi's to free
                         * memory.
                         */
-                       spin_lock(&bdi_lock);
+                       spin_lock_bh(&bdi_lock);
                        list_add_tail(&bdi->bdi_list, &bdi_pending_list);
-                       spin_unlock(&bdi_lock);
+                       spin_unlock_bh(&bdi_lock);
 
                        bdi_flush_io(bdi);
                }
@@ -455,6 +477,24 @@ static int bdi_forker_task(void *ptr)
        return 0;
 }
 
+static void bdi_add_to_pending(struct rcu_head *head)
+{
+       struct backing_dev_info *bdi;
+
+       bdi = container_of(head, struct backing_dev_info, rcu_head);
+       INIT_LIST_HEAD(&bdi->bdi_list);
+
+       spin_lock(&bdi_lock);
+       list_add_tail(&bdi->bdi_list, &bdi_pending_list);
+       spin_unlock(&bdi_lock);
+
+       /*
+        * We are now on the pending list, wake up bdi_forker_task()
+        * to finish the job and add us back to the active bdi_list
+        */
+       wake_up_process(default_backing_dev_info.wb.task);
+}
+
 /*
  * Add the default flusher task that gets created for any bdi
  * that has dirty data pending writeout
@@ -464,6 +504,12 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
        if (!bdi_cap_writeback_dirty(bdi))
                return;
 
+       if (WARN_ON(!test_bit(BDI_registered, &bdi->state))) {
+               printk(KERN_ERR "bdi %p/%s is not registered!\n",
+                                                       bdi, bdi->name);
+               return;
+       }
+
        /*
         * Check with the helper whether to proceed adding a task. Will only
         * abort if we two or more simultanous calls to
@@ -471,16 +517,29 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
         * waiting for previous additions to finish.
         */
        if (!test_and_set_bit(BDI_pending, &bdi->state)) {
-               list_move_tail(&bdi->bdi_list, &bdi_pending_list);
+               list_del_rcu(&bdi->bdi_list);
 
                /*
-                * We are now on the pending list, wake up bdi_forker_task()
-                * to finish the job and add us back to the active bdi_list
+                * We must wait for the current RCU period to end before
+                * moving to the pending list. So schedule that operation
+                * from an RCU callback.
                 */
-               wake_up_process(default_backing_dev_info.wb.task);
+               call_rcu(&bdi->rcu_head, bdi_add_to_pending);
        }
 }
 
+/*
+ * Remove bdi from bdi_list, and ensure that it is no longer visible
+ */
+static void bdi_remove_from_list(struct backing_dev_info *bdi)
+{
+       spin_lock_bh(&bdi_lock);
+       list_del_rcu(&bdi->bdi_list);
+       spin_unlock_bh(&bdi_lock);
+
+       synchronize_rcu();
+}
+
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
                const char *fmt, ...)
 {
@@ -499,9 +558,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
                goto exit;
        }
 
-       spin_lock(&bdi_lock);
-       list_add_tail(&bdi->bdi_list, &bdi_list);
-       spin_unlock(&bdi_lock);
+       spin_lock_bh(&bdi_lock);
+       list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
+       spin_unlock_bh(&bdi_lock);
 
        bdi->dev = dev;
 
@@ -519,14 +578,13 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
                        wb->task = NULL;
                        ret = -ENOMEM;
 
-                       spin_lock(&bdi_lock);
-                       list_del(&bdi->bdi_list);
-                       spin_unlock(&bdi_lock);
+                       bdi_remove_from_list(bdi);
                        goto exit;
                }
        }
 
        bdi_debug_register(bdi, dev_name(dev));
+       set_bit(BDI_registered, &bdi->state);
 exit:
        return ret;
 }
@@ -557,21 +615,40 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
        /*
         * Make sure nobody finds us on the bdi_list anymore
         */
-       spin_lock(&bdi_lock);
-       list_del(&bdi->bdi_list);
-       spin_unlock(&bdi_lock);
+       bdi_remove_from_list(bdi);
 
        /*
         * Finally, kill the kernel threads. We don't need to be RCU
-        * safe anymore, since the bdi is gone from visibility.
+        * safe anymore, since the bdi is gone from visibility. Force
+        * unfreeze of the thread before calling kthread_stop(), otherwise
+        * it would never exet if it is currently stuck in the refrigerator.
         */
-       list_for_each_entry(wb, &bdi->wb_list, list)
+       list_for_each_entry(wb, &bdi->wb_list, list) {
+               thaw_process(wb->task);
                kthread_stop(wb->task);
+       }
+}
+
+/*
+ * This bdi is going away now, make sure that no super_blocks point to it
+ */
+static void bdi_prune_sb(struct backing_dev_info *bdi)
+{
+       struct super_block *sb;
+
+       spin_lock(&sb_lock);
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               if (sb->s_bdi == bdi)
+                       sb->s_bdi = NULL;
+       }
+       spin_unlock(&sb_lock);
 }
 
 void bdi_unregister(struct backing_dev_info *bdi)
 {
        if (bdi->dev) {
+               bdi_prune_sb(bdi);
+
                if (!bdi_cap_flush_forker(bdi))
                        bdi_wb_shutdown(bdi);
                bdi_debug_unregister(bdi);
@@ -591,6 +668,7 @@ int bdi_init(struct backing_dev_info *bdi)
        bdi->max_ratio = 100;
        bdi->max_prop_frac = PROP_FRAC_BASE;
        spin_lock_init(&bdi->wb_lock);
+       INIT_RCU_HEAD(&bdi->rcu_head);
        INIT_LIST_HEAD(&bdi->bdi_list);
        INIT_LIST_HEAD(&bdi->wb_list);
        INIT_LIST_HEAD(&bdi->work_list);
@@ -626,7 +704,19 @@ void bdi_destroy(struct backing_dev_info *bdi)
 {
        int i;
 
-       WARN_ON(bdi_has_dirty_io(bdi));
+       /*
+        * Splice our entries to the default_backing_dev_info, if this
+        * bdi disappears
+        */
+       if (bdi_has_dirty_io(bdi)) {
+               struct bdi_writeback *dst = &default_backing_dev_info.wb;
+
+               spin_lock(&inode_lock);
+               list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
+               list_splice(&bdi->wb.b_io, &dst->b_io);
+               list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
+               spin_unlock(&inode_lock);
+       }
 
        bdi_unregister(bdi);
 
@@ -637,6 +727,33 @@ void bdi_destroy(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL(bdi_destroy);
 
+/*
+ * For use from filesystems to quickly init and register a bdi associated
+ * with dirty writeback
+ */
+int bdi_setup_and_register(struct backing_dev_info *bdi, char *name,
+                          unsigned int cap)
+{
+       char tmp[32];
+       int err;
+
+       bdi->name = name;
+       bdi->capabilities = cap;
+       err = bdi_init(bdi);
+       if (err)
+               return err;
+
+       sprintf(tmp, "%.28s%s", name, "-%d");
+       err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq));
+       if (err) {
+               bdi_destroy(bdi);
+               return err;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(bdi_setup_and_register);
+
 static wait_queue_head_t congestion_wqh[2] = {
                __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
                __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])