nfsd: support ext4 i_version
[safe/jmp/linux-2.6] / mm / pdflush.c
1 /*
2  * mm/pdflush.c - worker threads for writing back filesystem data
3  *
4  * Copyright (C) 2002, Linus Torvalds.
5  *
6  * 09Apr2002    Andrew Morton
7  *              Initial version
8  * 29Feb2004    kaos@sgi.com
9  *              Move worker thread creation to kthread to avoid chewing
10  *              up stack space with nested calls to kernel_thread.
11  */
12
13 #include <linux/sched.h>
14 #include <linux/list.h>
15 #include <linux/signal.h>
16 #include <linux/spinlock.h>
17 #include <linux/gfp.h>
18 #include <linux/init.h>
19 #include <linux/module.h>
20 #include <linux/fs.h>           /* Needed by writeback.h          */
21 #include <linux/writeback.h>    /* Prototypes pdflush_operation() */
22 #include <linux/kthread.h>
23 #include <linux/cpuset.h>
24 #include <linux/freezer.h>
25
26
27 /*
28  * Minimum and maximum number of pdflush instances
29  */
30 #define MIN_PDFLUSH_THREADS     2
31 #define MAX_PDFLUSH_THREADS     8
32
33 static void start_one_pdflush_thread(void);
34
35
36 /*
37  * The pdflush threads are worker threads for writing back dirty data.
38  * Ideally, we'd like one thread per active disk spindle.  But the disk
39  * topology is very hard to divine at this level.   Instead, we take
40  * care in various places to prevent more than one pdflush thread from
41  * performing writeback against a single filesystem.  pdflush threads
42  * have the PF_FLUSHER flag set in current->flags to aid in this.
43  */
44
45 /*
46  * All the pdflush threads.  Protected by pdflush_lock
47  */
48 static LIST_HEAD(pdflush_list);
49 static DEFINE_SPINLOCK(pdflush_lock);
50
51 /*
52  * The count of currently-running pdflush threads.  Protected
53  * by pdflush_lock.
54  *
55  * Readable by sysctl, but not writable.  Published to userspace at
56  * /proc/sys/vm/nr_pdflush_threads.
57  */
58 int nr_pdflush_threads = 0;
59
60 /*
61  * The max/min number of pdflush threads. R/W by sysctl at
62  * /proc/sys/vm/nr_pdflush_threads_max/min
63  */
64 int nr_pdflush_threads_max __read_mostly = MAX_PDFLUSH_THREADS;
65 int nr_pdflush_threads_min __read_mostly = MIN_PDFLUSH_THREADS;
66
67
68 /*
69  * The time at which the pdflush thread pool last went empty
70  */
71 static unsigned long last_empty_jifs;
72
73 /*
74  * The pdflush thread.
75  *
76  * Thread pool management algorithm:
77  * 
78  * - The minimum and maximum number of pdflush instances are bound
79  *   by nr_pdflush_threads_min and nr_pdflush_threads_max.
80  * 
81  * - If there have been no idle pdflush instances for 1 second, create
82  *   a new one.
83  * 
84  * - If the least-recently-went-to-sleep pdflush thread has been asleep
85  *   for more than one second, terminate a thread.
86  */
87
88 /*
89  * A structure for passing work to a pdflush thread.  Also for passing
90  * state information between pdflush threads.  Protected by pdflush_lock.
91  */
92 struct pdflush_work {
93         struct task_struct *who;        /* The thread */
94         void (*fn)(unsigned long);      /* A callback function */
95         unsigned long arg0;             /* An argument to the callback */
96         struct list_head list;          /* On pdflush_list, when idle */
97         unsigned long when_i_went_to_sleep;
98 };
99
100 static int __pdflush(struct pdflush_work *my_work)
101 {
102         current->flags |= PF_FLUSHER | PF_SWAPWRITE;
103         set_freezable();
104         my_work->fn = NULL;
105         my_work->who = current;
106         INIT_LIST_HEAD(&my_work->list);
107
108         spin_lock_irq(&pdflush_lock);
109         for ( ; ; ) {
110                 struct pdflush_work *pdf;
111
112                 set_current_state(TASK_INTERRUPTIBLE);
113                 list_move(&my_work->list, &pdflush_list);
114                 my_work->when_i_went_to_sleep = jiffies;
115                 spin_unlock_irq(&pdflush_lock);
116                 schedule();
117                 try_to_freeze();
118                 spin_lock_irq(&pdflush_lock);
119                 if (!list_empty(&my_work->list)) {
120                         /*
121                          * Someone woke us up, but without removing our control
122                          * structure from the global list.  swsusp will do this
123                          * in try_to_freeze()->refrigerator().  Handle it.
124                          */
125                         my_work->fn = NULL;
126                         continue;
127                 }
128                 if (my_work->fn == NULL) {
129                         printk("pdflush: bogus wakeup\n");
130                         continue;
131                 }
132                 spin_unlock_irq(&pdflush_lock);
133
134                 (*my_work->fn)(my_work->arg0);
135
136                 spin_lock_irq(&pdflush_lock);
137
138                 /*
139                  * Thread creation: For how long have there been zero
140                  * available threads?
141                  *
142                  * To throttle creation, we reset last_empty_jifs.
143                  */
144                 if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
145                         if (list_empty(&pdflush_list) &&
146                             nr_pdflush_threads < nr_pdflush_threads_max) {
147                                 last_empty_jifs = jiffies;
148                                 nr_pdflush_threads++;
149                                 spin_unlock_irq(&pdflush_lock);
150                                 start_one_pdflush_thread();
151                                 spin_lock_irq(&pdflush_lock);
152                         }
153                 }
154
155                 my_work->fn = NULL;
156
157                 /*
158                  * Thread destruction: For how long has the sleepiest
159                  * thread slept?
160                  */
161                 if (list_empty(&pdflush_list))
162                         continue;
163                 if (nr_pdflush_threads <= nr_pdflush_threads_min)
164                         continue;
165                 pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
166                 if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
167                         /* Limit exit rate */
168                         pdf->when_i_went_to_sleep = jiffies;
169                         break;                                  /* exeunt */
170                 }
171         }
172         nr_pdflush_threads--;
173         spin_unlock_irq(&pdflush_lock);
174         return 0;
175 }
176
177 /*
178  * Of course, my_work wants to be just a local in __pdflush().  It is
179  * separated out in this manner to hopefully prevent the compiler from
180  * performing unfortunate optimisations against the auto variables.  Because
181  * these are visible to other tasks and CPUs.  (No problem has actually
182  * been observed.  This is just paranoia).
183  */
184 static int pdflush(void *dummy)
185 {
186         struct pdflush_work my_work;
187         cpumask_var_t cpus_allowed;
188
189         /*
190          * Since the caller doesn't even check kthread_run() worked, let's not
191          * freak out too much if this fails.
192          */
193         if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
194                 printk(KERN_WARNING "pdflush failed to allocate cpumask\n");
195                 return 0;
196         }
197
198         /*
199          * pdflush can spend a lot of time doing encryption via dm-crypt.  We
200          * don't want to do that at keventd's priority.
201          */
202         set_user_nice(current, 0);
203
204         /*
205          * Some configs put our parent kthread in a limited cpuset,
206          * which kthread() overrides, forcing cpus_allowed == cpu_all_mask.
207          * Our needs are more modest - cut back to our cpusets cpus_allowed.
208          * This is needed as pdflush's are dynamically created and destroyed.
209          * The boottime pdflush's are easily placed w/o these 2 lines.
210          */
211         cpuset_cpus_allowed(current, cpus_allowed);
212         set_cpus_allowed_ptr(current, cpus_allowed);
213         free_cpumask_var(cpus_allowed);
214
215         return __pdflush(&my_work);
216 }
217
218 /*
219  * Attempt to wake up a pdflush thread, and get it to do some work for you.
220  * Returns zero if it indeed managed to find a worker thread, and passed your
221  * payload to it.
222  */
223 int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
224 {
225         unsigned long flags;
226         int ret = 0;
227
228         BUG_ON(fn == NULL);     /* Hard to diagnose if it's deferred */
229
230         spin_lock_irqsave(&pdflush_lock, flags);
231         if (list_empty(&pdflush_list)) {
232                 ret = -1;
233         } else {
234                 struct pdflush_work *pdf;
235
236                 pdf = list_entry(pdflush_list.next, struct pdflush_work, list);
237                 list_del_init(&pdf->list);
238                 if (list_empty(&pdflush_list))
239                         last_empty_jifs = jiffies;
240                 pdf->fn = fn;
241                 pdf->arg0 = arg0;
242                 wake_up_process(pdf->who);
243         }
244         spin_unlock_irqrestore(&pdflush_lock, flags);
245
246         return ret;
247 }
248
249 static void start_one_pdflush_thread(void)
250 {
251         struct task_struct *k;
252
253         k = kthread_run(pdflush, NULL, "pdflush");
254         if (unlikely(IS_ERR(k))) {
255                 spin_lock_irq(&pdflush_lock);
256                 nr_pdflush_threads--;
257                 spin_unlock_irq(&pdflush_lock);
258         }
259 }
260
261 static int __init pdflush_init(void)
262 {
263         int i;
264
265         /*
266          * Pre-set nr_pdflush_threads...  If we fail to create,
267          * the count will be decremented.
268          */
269         nr_pdflush_threads = nr_pdflush_threads_min;
270
271         for (i = 0; i < nr_pdflush_threads_min; i++)
272                 start_one_pdflush_thread();
273         return 0;
274 }
275
276 module_init(pdflush_init);