[PATCH] swsusp: rework memory freeing on resume
[safe/jmp/linux-2.6] / kernel / power / snapshot.c
1 /*
2  * linux/kernel/power/swsusp.c
3  *
4  * This file is to realize architecture-independent
5  * machine suspend feature using pretty near only high-level routines
6  *
7  * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
8  *
9  * This file is released under the GPLv2, and is based on swsusp.c.
10  *
11  */
12
13
14 #include <linux/module.h>
15 #include <linux/mm.h>
16 #include <linux/suspend.h>
17 #include <linux/smp_lock.h>
18 #include <linux/file.h>
19 #include <linux/utsname.h>
20 #include <linux/version.h>
21 #include <linux/delay.h>
22 #include <linux/reboot.h>
23 #include <linux/bitops.h>
24 #include <linux/vt_kern.h>
25 #include <linux/kbd_kern.h>
26 #include <linux/keyboard.h>
27 #include <linux/spinlock.h>
28 #include <linux/genhd.h>
29 #include <linux/kernel.h>
30 #include <linux/major.h>
31 #include <linux/swap.h>
32 #include <linux/pm.h>
33 #include <linux/device.h>
34 #include <linux/buffer_head.h>
35 #include <linux/swapops.h>
36 #include <linux/bootmem.h>
37 #include <linux/syscalls.h>
38 #include <linux/console.h>
39 #include <linux/highmem.h>
40 #include <linux/bio.h>
41 #include <linux/mount.h>
42
43 #include <asm/uaccess.h>
44 #include <asm/mmu_context.h>
45 #include <asm/pgtable.h>
46 #include <asm/tlbflush.h>
47 #include <asm/io.h>
48
49 #include <linux/random.h>
50 #include <linux/crypto.h>
51 #include <asm/scatterlist.h>
52
53 #include "power.h"
54
55
56
57
58 #ifdef CONFIG_HIGHMEM
59 struct highmem_page {
60         char *data;
61         struct page *page;
62         struct highmem_page *next;
63 };
64
65 static struct highmem_page *highmem_copy;
66
67 static int save_highmem_zone(struct zone *zone)
68 {
69         unsigned long zone_pfn;
70         mark_free_pages(zone);
71         for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
72                 struct page *page;
73                 struct highmem_page *save;
74                 void *kaddr;
75                 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
76
77                 if (!(pfn%1000))
78                         printk(".");
79                 if (!pfn_valid(pfn))
80                         continue;
81                 page = pfn_to_page(pfn);
82                 /*
83                  * This condition results from rvmalloc() sans vmalloc_32()
84                  * and architectural memory reservations. This should be
85                  * corrected eventually when the cases giving rise to this
86                  * are better understood.
87                  */
88                 if (PageReserved(page)) {
89                         printk("highmem reserved page?!\n");
90                         continue;
91                 }
92                 BUG_ON(PageNosave(page));
93                 if (PageNosaveFree(page))
94                         continue;
95                 save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
96                 if (!save)
97                         return -ENOMEM;
98                 save->next = highmem_copy;
99                 save->page = page;
100                 save->data = (void *) get_zeroed_page(GFP_ATOMIC);
101                 if (!save->data) {
102                         kfree(save);
103                         return -ENOMEM;
104                 }
105                 kaddr = kmap_atomic(page, KM_USER0);
106                 memcpy(save->data, kaddr, PAGE_SIZE);
107                 kunmap_atomic(kaddr, KM_USER0);
108                 highmem_copy = save;
109         }
110         return 0;
111 }
112 #endif /* CONFIG_HIGHMEM */
113
114
115 static int save_highmem(void)
116 {
117 #ifdef CONFIG_HIGHMEM
118         struct zone *zone;
119         int res = 0;
120
121         pr_debug("swsusp: Saving Highmem\n");
122         for_each_zone (zone) {
123                 if (is_highmem(zone))
124                         res = save_highmem_zone(zone);
125                 if (res)
126                         return res;
127         }
128 #endif
129         return 0;
130 }
131
132 int restore_highmem(void)
133 {
134 #ifdef CONFIG_HIGHMEM
135         printk("swsusp: Restoring Highmem\n");
136         while (highmem_copy) {
137                 struct highmem_page *save = highmem_copy;
138                 void *kaddr;
139                 highmem_copy = save->next;
140
141                 kaddr = kmap_atomic(save->page, KM_USER0);
142                 memcpy(kaddr, save->data, PAGE_SIZE);
143                 kunmap_atomic(kaddr, KM_USER0);
144                 free_page((long) save->data);
145                 kfree(save);
146         }
147 #endif
148         return 0;
149 }
150
151
152 static int pfn_is_nosave(unsigned long pfn)
153 {
154         unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
155         unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
156         return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
157 }
158
159 /**
160  *      saveable - Determine whether a page should be cloned or not.
161  *      @pfn:   The page
162  *
163  *      We save a page if it's Reserved, and not in the range of pages
164  *      statically defined as 'unsaveable', or if it isn't reserved, and
165  *      isn't part of a free chunk of pages.
166  */
167
168 static int saveable(struct zone * zone, unsigned long * zone_pfn)
169 {
170         unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
171         struct page * page;
172
173         if (!pfn_valid(pfn))
174                 return 0;
175
176         page = pfn_to_page(pfn);
177         BUG_ON(PageReserved(page) && PageNosave(page));
178         if (PageNosave(page))
179                 return 0;
180         if (PageReserved(page) && pfn_is_nosave(pfn)) {
181                 pr_debug("[nosave pfn 0x%lx]", pfn);
182                 return 0;
183         }
184         if (PageNosaveFree(page))
185                 return 0;
186
187         return 1;
188 }
189
190 static unsigned count_data_pages(void)
191 {
192         struct zone *zone;
193         unsigned long zone_pfn;
194         unsigned n;
195
196         n = 0;
197         for_each_zone (zone) {
198                 if (is_highmem(zone))
199                         continue;
200                 mark_free_pages(zone);
201                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
202                         n += saveable(zone, &zone_pfn);
203         }
204         return n;
205 }
206
207 static void copy_data_pages(struct pbe *pblist)
208 {
209         struct zone *zone;
210         unsigned long zone_pfn;
211         struct pbe *pbe, *p;
212
213         pbe = pblist;
214         for_each_zone (zone) {
215                 if (is_highmem(zone))
216                         continue;
217                 mark_free_pages(zone);
218                 /* This is necessary for swsusp_free() */
219                 for_each_pb_page (p, pblist)
220                         SetPageNosaveFree(virt_to_page(p));
221                 for_each_pbe (p, pblist)
222                         SetPageNosaveFree(virt_to_page(p->address));
223                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
224                         if (saveable(zone, &zone_pfn)) {
225                                 struct page * page;
226                                 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
227                                 BUG_ON(!pbe);
228                                 pbe->orig_address = (unsigned long)page_address(page);
229                                 /* copy_page is not usable for copying task structs. */
230                                 memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
231                                 pbe = pbe->next;
232                         }
233                 }
234         }
235         BUG_ON(pbe);
236 }
237
238
239 /**
240  *      free_pagedir - free pages allocated with alloc_pagedir()
241  */
242
243 static void free_pagedir(struct pbe *pblist)
244 {
245         struct pbe *pbe;
246
247         while (pblist) {
248                 pbe = (pblist + PB_PAGE_SKIP)->next;
249                 ClearPageNosave(virt_to_page(pblist));
250                 ClearPageNosaveFree(virt_to_page(pblist));
251                 free_page((unsigned long)pblist);
252                 pblist = pbe;
253         }
254 }
255
256 /**
257  *      fill_pb_page - Create a list of PBEs on a given memory page
258  */
259
260 static inline void fill_pb_page(struct pbe *pbpage)
261 {
262         struct pbe *p;
263
264         p = pbpage;
265         pbpage += PB_PAGE_SKIP;
266         do
267                 p->next = p + 1;
268         while (++p < pbpage);
269 }
270
271 /**
272  *      create_pbe_list - Create a list of PBEs on top of a given chain
273  *      of memory pages allocated with alloc_pagedir()
274  */
275
276 void create_pbe_list(struct pbe *pblist, unsigned nr_pages)
277 {
278         struct pbe *pbpage, *p;
279         unsigned num = PBES_PER_PAGE;
280
281         for_each_pb_page (pbpage, pblist) {
282                 if (num >= nr_pages)
283                         break;
284
285                 fill_pb_page(pbpage);
286                 num += PBES_PER_PAGE;
287         }
288         if (pbpage) {
289                 for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
290                         p->next = p + 1;
291                 p->next = NULL;
292         }
293         pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
294 }
295
296 static void *alloc_image_page(void)
297 {
298         void *res = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
299         if (res) {
300                 SetPageNosave(virt_to_page(res));
301                 SetPageNosaveFree(virt_to_page(res));
302         }
303         return res;
304 }
305
306 /**
307  *      alloc_pagedir - Allocate the page directory.
308  *
309  *      First, determine exactly how many pages we need and
310  *      allocate them.
311  *
312  *      We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
313  *      struct pbe elements (pbes) and the last element in the page points
314  *      to the next page.
315  *
316  *      On each page we set up a list of struct_pbe elements.
317  */
318
319 struct pbe * alloc_pagedir(unsigned nr_pages)
320 {
321         unsigned num;
322         struct pbe *pblist, *pbe;
323
324         if (!nr_pages)
325                 return NULL;
326
327         pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
328         pblist = (struct pbe *)alloc_image_page();
329         /* FIXME: rewrite this ugly loop */
330         for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
331                         pbe = pbe->next, num += PBES_PER_PAGE) {
332                 pbe += PB_PAGE_SKIP;
333                 pbe->next = (struct pbe *)alloc_image_page();
334         }
335         if (!pbe) { /* get_zeroed_page() failed */
336                 free_pagedir(pblist);
337                 pblist = NULL;
338         }
339         return pblist;
340 }
341
342 /**
343  * Free pages we allocated for suspend. Suspend pages are alocated
344  * before atomic copy, so we need to free them after resume.
345  */
346
347 void swsusp_free(void)
348 {
349         struct zone *zone;
350         unsigned long zone_pfn;
351
352         for_each_zone(zone) {
353                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
354                         if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
355                                 struct page * page;
356                                 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
357                                 if (PageNosave(page) && PageNosaveFree(page)) {
358                                         ClearPageNosave(page);
359                                         ClearPageNosaveFree(page);
360                                         free_page((long) page_address(page));
361                                 }
362                         }
363         }
364 }
365
366
367 /**
368  *      enough_free_mem - Make sure we enough free memory to snapshot.
369  *
370  *      Returns TRUE or FALSE after checking the number of available
371  *      free pages.
372  */
373
374 static int enough_free_mem(unsigned nr_pages)
375 {
376         pr_debug("swsusp: available memory: %u pages\n", nr_free_pages());
377         return nr_free_pages() > (nr_pages + PAGES_FOR_IO +
378                 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
379 }
380
381
382 static struct pbe *swsusp_alloc(unsigned nr_pages)
383 {
384         struct pbe *pblist, *p;
385
386         if (!(pblist = alloc_pagedir(nr_pages))) {
387                 printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
388                 return NULL;
389         }
390         create_pbe_list(pblist, nr_pages);
391
392         for_each_pbe (p, pblist) {
393                 p->address = (unsigned long)alloc_image_page();
394                 if (!p->address) {
395                         printk(KERN_ERR "suspend: Allocating image pages failed.\n");
396                         swsusp_free();
397                         return NULL;
398                 }
399         }
400
401         return pblist;
402 }
403
404 static int suspend_prepare_image(void)
405 {
406         unsigned nr_pages;
407
408         pr_debug("swsusp: critical section: \n");
409         if (save_highmem()) {
410                 printk(KERN_CRIT "swsusp: Not enough free pages for highmem\n");
411                 restore_highmem();
412                 return -ENOMEM;
413         }
414
415         drain_local_pages();
416         nr_pages = count_data_pages();
417         printk("swsusp: Need to copy %u pages\n", nr_pages);
418
419         pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
420                  nr_pages,
421                  (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
422                  PAGES_FOR_IO, nr_free_pages());
423
424         /* This is needed because of the fixed size of swsusp_info */
425         if (MAX_PBES < (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE)
426                 return -ENOSPC;
427
428         if (!enough_free_mem(nr_pages)) {
429                 printk(KERN_ERR "swsusp: Not enough free memory\n");
430                 return -ENOMEM;
431         }
432
433         if (!enough_swap(nr_pages)) {
434                 printk(KERN_ERR "swsusp: Not enough free swap\n");
435                 return -ENOSPC;
436         }
437
438         pagedir_nosave = swsusp_alloc(nr_pages);
439         if (!pagedir_nosave)
440                 return -ENOMEM;
441
442         /* During allocating of suspend pagedir, new cold pages may appear.
443          * Kill them.
444          */
445         drain_local_pages();
446         copy_data_pages(pagedir_nosave);
447
448         /*
449          * End of critical section. From now on, we can write to memory,
450          * but we should not touch disk. This specially means we must _not_
451          * touch swap space! Except we must write out our image of course.
452          */
453
454         nr_copy_pages = nr_pages;
455
456         printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
457         return 0;
458 }
459
460
461 asmlinkage int swsusp_save(void)
462 {
463         return suspend_prepare_image();
464 }