f5fc2d7680f26dbbb84aa1af4b9afa68ba7edf8b
[safe/jmp/linux-2.6] / kernel / power / snapshot.c
1 /*
2  * linux/kernel/power/snapshot.c
3  *
4  * This file provides system snapshot/restore functionality for swsusp.
5  *
6  * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
7  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
8  *
9  * This file is released under the GPLv2.
10  *
11  */
12
13 #include <linux/version.h>
14 #include <linux/module.h>
15 #include <linux/mm.h>
16 #include <linux/suspend.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/spinlock.h>
20 #include <linux/kernel.h>
21 #include <linux/pm.h>
22 #include <linux/device.h>
23 #include <linux/init.h>
24 #include <linux/bootmem.h>
25 #include <linux/syscalls.h>
26 #include <linux/console.h>
27 #include <linux/highmem.h>
28 #include <linux/list.h>
29
30 #include <asm/uaccess.h>
31 #include <asm/mmu_context.h>
32 #include <asm/pgtable.h>
33 #include <asm/tlbflush.h>
34 #include <asm/io.h>
35
36 #include "power.h"
37
38 static int swsusp_page_is_free(struct page *);
39 static void swsusp_set_page_forbidden(struct page *);
40 static void swsusp_unset_page_forbidden(struct page *);
41
42 /* List of PBEs needed for restoring the pages that were allocated before
43  * the suspend and included in the suspend image, but have also been
44  * allocated by the "resume" kernel, so their contents cannot be written
45  * directly to their "original" page frames.
46  */
47 struct pbe *restore_pblist;
48
49 /* Pointer to an auxiliary buffer (1 page) */
50 static void *buffer;
51
52 /**
53  *      @safe_needed - on resume, for storing the PBE list and the image,
54  *      we can only use memory pages that do not conflict with the pages
55  *      used before suspend.  The unsafe pages have PageNosaveFree set
56  *      and we count them using unsafe_pages.
57  *
58  *      Each allocated image page is marked as PageNosave and PageNosaveFree
59  *      so that swsusp_free() can release it.
60  */
61
62 #define PG_ANY          0
63 #define PG_SAFE         1
64 #define PG_UNSAFE_CLEAR 1
65 #define PG_UNSAFE_KEEP  0
66
67 static unsigned int allocated_unsafe_pages;
68
69 static void *get_image_page(gfp_t gfp_mask, int safe_needed)
70 {
71         void *res;
72
73         res = (void *)get_zeroed_page(gfp_mask);
74         if (safe_needed)
75                 while (res && swsusp_page_is_free(virt_to_page(res))) {
76                         /* The page is unsafe, mark it for swsusp_free() */
77                         swsusp_set_page_forbidden(virt_to_page(res));
78                         allocated_unsafe_pages++;
79                         res = (void *)get_zeroed_page(gfp_mask);
80                 }
81         if (res) {
82                 swsusp_set_page_forbidden(virt_to_page(res));
83                 swsusp_set_page_free(virt_to_page(res));
84         }
85         return res;
86 }
87
88 unsigned long get_safe_page(gfp_t gfp_mask)
89 {
90         return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
91 }
92
93 static struct page *alloc_image_page(gfp_t gfp_mask)
94 {
95         struct page *page;
96
97         page = alloc_page(gfp_mask);
98         if (page) {
99                 swsusp_set_page_forbidden(page);
100                 swsusp_set_page_free(page);
101         }
102         return page;
103 }
104
105 /**
106  *      free_image_page - free page represented by @addr, allocated with
107  *      get_image_page (page flags set by it must be cleared)
108  */
109
110 static inline void free_image_page(void *addr, int clear_nosave_free)
111 {
112         struct page *page;
113
114         BUG_ON(!virt_addr_valid(addr));
115
116         page = virt_to_page(addr);
117
118         swsusp_unset_page_forbidden(page);
119         if (clear_nosave_free)
120                 swsusp_unset_page_free(page);
121
122         __free_page(page);
123 }
124
125 /* struct linked_page is used to build chains of pages */
126
127 #define LINKED_PAGE_DATA_SIZE   (PAGE_SIZE - sizeof(void *))
128
129 struct linked_page {
130         struct linked_page *next;
131         char data[LINKED_PAGE_DATA_SIZE];
132 } __attribute__((packed));
133
134 static inline void
135 free_list_of_pages(struct linked_page *list, int clear_page_nosave)
136 {
137         while (list) {
138                 struct linked_page *lp = list->next;
139
140                 free_image_page(list, clear_page_nosave);
141                 list = lp;
142         }
143 }
144
145 /**
146   *     struct chain_allocator is used for allocating small objects out of
147   *     a linked list of pages called 'the chain'.
148   *
149   *     The chain grows each time when there is no room for a new object in
150   *     the current page.  The allocated objects cannot be freed individually.
151   *     It is only possible to free them all at once, by freeing the entire
152   *     chain.
153   *
154   *     NOTE: The chain allocator may be inefficient if the allocated objects
155   *     are not much smaller than PAGE_SIZE.
156   */
157
158 struct chain_allocator {
159         struct linked_page *chain;      /* the chain */
160         unsigned int used_space;        /* total size of objects allocated out
161                                          * of the current page
162                                          */
163         gfp_t gfp_mask;         /* mask for allocating pages */
164         int safe_needed;        /* if set, only "safe" pages are allocated */
165 };
166
167 static void
168 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
169 {
170         ca->chain = NULL;
171         ca->used_space = LINKED_PAGE_DATA_SIZE;
172         ca->gfp_mask = gfp_mask;
173         ca->safe_needed = safe_needed;
174 }
175
176 static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
177 {
178         void *ret;
179
180         if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
181                 struct linked_page *lp;
182
183                 lp = get_image_page(ca->gfp_mask, ca->safe_needed);
184                 if (!lp)
185                         return NULL;
186
187                 lp->next = ca->chain;
188                 ca->chain = lp;
189                 ca->used_space = 0;
190         }
191         ret = ca->chain->data + ca->used_space;
192         ca->used_space += size;
193         return ret;
194 }
195
196 /**
197  *      Data types related to memory bitmaps.
198  *
199  *      Memory bitmap is a structure consiting of many linked lists of
200  *      objects.  The main list's elements are of type struct zone_bitmap
201  *      and each of them corresonds to one zone.  For each zone bitmap
202  *      object there is a list of objects of type struct bm_block that
203  *      represent each blocks of bitmap in which information is stored.
204  *
205  *      struct memory_bitmap contains a pointer to the main list of zone
206  *      bitmap objects, a struct bm_position used for browsing the bitmap,
207  *      and a pointer to the list of pages used for allocating all of the
208  *      zone bitmap objects and bitmap block objects.
209  *
210  *      NOTE: It has to be possible to lay out the bitmap in memory
211  *      using only allocations of order 0.  Additionally, the bitmap is
212  *      designed to work with arbitrary number of zones (this is over the
213  *      top for now, but let's avoid making unnecessary assumptions ;-).
214  *
215  *      struct zone_bitmap contains a pointer to a list of bitmap block
216  *      objects and a pointer to the bitmap block object that has been
217  *      most recently used for setting bits.  Additionally, it contains the
218  *      pfns that correspond to the start and end of the represented zone.
219  *
220  *      struct bm_block contains a pointer to the memory page in which
221  *      information is stored (in the form of a block of bitmap)
222  *      It also contains the pfns that correspond to the start and end of
223  *      the represented memory area.
224  */
225
226 #define BM_END_OF_MAP   (~0UL)
227
228 #define BM_BITS_PER_BLOCK       (PAGE_SIZE << 3)
229
230 struct bm_block {
231         struct list_head hook;  /* hook into a list of bitmap blocks */
232         unsigned long start_pfn;        /* pfn represented by the first bit */
233         unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
234         unsigned long *data;    /* bitmap representing pages */
235 };
236
237 static inline unsigned long bm_block_bits(struct bm_block *bb)
238 {
239         return bb->end_pfn - bb->start_pfn;
240 }
241
242 /* strcut bm_position is used for browsing memory bitmaps */
243
244 struct bm_position {
245         struct bm_block *block;
246         int bit;
247 };
248
249 struct memory_bitmap {
250         struct list_head blocks;        /* list of bitmap blocks */
251         struct linked_page *p_list;     /* list of pages used to store zone
252                                          * bitmap objects and bitmap block
253                                          * objects
254                                          */
255         struct bm_position cur; /* most recently used bit position */
256 };
257
258 /* Functions that operate on memory bitmaps */
259
260 static void memory_bm_position_reset(struct memory_bitmap *bm)
261 {
262         bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
263         bm->cur.bit = 0;
264 }
265
266 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
267
268 /**
269  *      create_bm_block_list - create a list of block bitmap objects
270  *      @nr_blocks - number of blocks to allocate
271  *      @list - list to put the allocated blocks into
272  *      @ca - chain allocator to be used for allocating memory
273  */
274 static int create_bm_block_list(unsigned long pages,
275                                 struct list_head *list,
276                                 struct chain_allocator *ca)
277 {
278         unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
279
280         while (nr_blocks-- > 0) {
281                 struct bm_block *bb;
282
283                 bb = chain_alloc(ca, sizeof(struct bm_block));
284                 if (!bb)
285                         return -ENOMEM;
286                 list_add(&bb->hook, list);
287         }
288
289         return 0;
290 }
291
292 struct mem_extent {
293         struct list_head hook;
294         unsigned long start;
295         unsigned long end;
296 };
297
298 /**
299  *      free_mem_extents - free a list of memory extents
300  *      @list - list of extents to empty
301  */
302 static void free_mem_extents(struct list_head *list)
303 {
304         struct mem_extent *ext, *aux;
305
306         list_for_each_entry_safe(ext, aux, list, hook) {
307                 list_del(&ext->hook);
308                 kfree(ext);
309         }
310 }
311
312 /**
313  *      create_mem_extents - create a list of memory extents representing
314  *                           contiguous ranges of PFNs
315  *      @list - list to put the extents into
316  *      @gfp_mask - mask to use for memory allocations
317  */
318 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
319 {
320         struct zone *zone;
321
322         INIT_LIST_HEAD(list);
323
324         for_each_zone(zone) {
325                 unsigned long zone_start, zone_end;
326                 struct mem_extent *ext, *cur, *aux;
327
328                 if (!populated_zone(zone))
329                         continue;
330
331                 zone_start = zone->zone_start_pfn;
332                 zone_end = zone->zone_start_pfn + zone->spanned_pages;
333
334                 list_for_each_entry(ext, list, hook)
335                         if (zone_start <= ext->end)
336                                 break;
337
338                 if (&ext->hook == list || zone_end < ext->start) {
339                         /* New extent is necessary */
340                         struct mem_extent *new_ext;
341
342                         new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
343                         if (!new_ext) {
344                                 free_mem_extents(list);
345                                 return -ENOMEM;
346                         }
347                         new_ext->start = zone_start;
348                         new_ext->end = zone_end;
349                         list_add_tail(&new_ext->hook, &ext->hook);
350                         continue;
351                 }
352
353                 /* Merge this zone's range of PFNs with the existing one */
354                 if (zone_start < ext->start)
355                         ext->start = zone_start;
356                 if (zone_end > ext->end)
357                         ext->end = zone_end;
358
359                 /* More merging may be possible */
360                 cur = ext;
361                 list_for_each_entry_safe_continue(cur, aux, list, hook) {
362                         if (zone_end < cur->start)
363                                 break;
364                         if (zone_end < cur->end)
365                                 ext->end = cur->end;
366                         list_del(&cur->hook);
367                         kfree(cur);
368                 }
369         }
370
371         return 0;
372 }
373
374 /**
375   *     memory_bm_create - allocate memory for a memory bitmap
376   */
377 static int
378 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
379 {
380         struct chain_allocator ca;
381         struct list_head mem_extents;
382         struct mem_extent *ext;
383         int error;
384
385         chain_init(&ca, gfp_mask, safe_needed);
386         INIT_LIST_HEAD(&bm->blocks);
387
388         error = create_mem_extents(&mem_extents, gfp_mask);
389         if (error)
390                 return error;
391
392         list_for_each_entry(ext, &mem_extents, hook) {
393                 struct bm_block *bb;
394                 unsigned long pfn = ext->start;
395                 unsigned long pages = ext->end - ext->start;
396
397                 bb = list_entry(bm->blocks.prev, struct bm_block, hook);
398
399                 error = create_bm_block_list(pages, bm->blocks.prev, &ca);
400                 if (error)
401                         goto Error;
402
403                 list_for_each_entry_continue(bb, &bm->blocks, hook) {
404                         bb->data = get_image_page(gfp_mask, safe_needed);
405                         if (!bb->data) {
406                                 error = -ENOMEM;
407                                 goto Error;
408                         }
409
410                         bb->start_pfn = pfn;
411                         if (pages >= BM_BITS_PER_BLOCK) {
412                                 pfn += BM_BITS_PER_BLOCK;
413                                 pages -= BM_BITS_PER_BLOCK;
414                         } else {
415                                 /* This is executed only once in the loop */
416                                 pfn += pages;
417                         }
418                         bb->end_pfn = pfn;
419                 }
420         }
421
422         bm->p_list = ca.chain;
423         memory_bm_position_reset(bm);
424  Exit:
425         free_mem_extents(&mem_extents);
426         return error;
427
428  Error:
429         bm->p_list = ca.chain;
430         memory_bm_free(bm, PG_UNSAFE_CLEAR);
431         goto Exit;
432 }
433
434 /**
435   *     memory_bm_free - free memory occupied by the memory bitmap @bm
436   */
437 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
438 {
439         struct bm_block *bb;
440
441         list_for_each_entry(bb, &bm->blocks, hook)
442                 if (bb->data)
443                         free_image_page(bb->data, clear_nosave_free);
444
445         free_list_of_pages(bm->p_list, clear_nosave_free);
446
447         INIT_LIST_HEAD(&bm->blocks);
448 }
449
450 /**
451  *      memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
452  *      to given pfn.  The cur_zone_bm member of @bm and the cur_block member
453  *      of @bm->cur_zone_bm are updated.
454  */
455 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
456                                 void **addr, unsigned int *bit_nr)
457 {
458         struct bm_block *bb;
459
460         /*
461          * Check if the pfn corresponds to the current bitmap block and find
462          * the block where it fits if this is not the case.
463          */
464         bb = bm->cur.block;
465         if (pfn < bb->start_pfn)
466                 list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
467                         if (pfn >= bb->start_pfn)
468                                 break;
469
470         if (pfn >= bb->end_pfn)
471                 list_for_each_entry_continue(bb, &bm->blocks, hook)
472                         if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
473                                 break;
474
475         if (&bb->hook == &bm->blocks)
476                 return -EFAULT;
477
478         /* The block has been found */
479         bm->cur.block = bb;
480         pfn -= bb->start_pfn;
481         bm->cur.bit = pfn + 1;
482         *bit_nr = pfn;
483         *addr = bb->data;
484         return 0;
485 }
486
487 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
488 {
489         void *addr;
490         unsigned int bit;
491         int error;
492
493         error = memory_bm_find_bit(bm, pfn, &addr, &bit);
494         BUG_ON(error);
495         set_bit(bit, addr);
496 }
497
498 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
499 {
500         void *addr;
501         unsigned int bit;
502         int error;
503
504         error = memory_bm_find_bit(bm, pfn, &addr, &bit);
505         if (!error)
506                 set_bit(bit, addr);
507         return error;
508 }
509
510 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
511 {
512         void *addr;
513         unsigned int bit;
514         int error;
515
516         error = memory_bm_find_bit(bm, pfn, &addr, &bit);
517         BUG_ON(error);
518         clear_bit(bit, addr);
519 }
520
521 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
522 {
523         void *addr;
524         unsigned int bit;
525         int error;
526
527         error = memory_bm_find_bit(bm, pfn, &addr, &bit);
528         BUG_ON(error);
529         return test_bit(bit, addr);
530 }
531
532 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
533 {
534         void *addr;
535         unsigned int bit;
536
537         return !memory_bm_find_bit(bm, pfn, &addr, &bit);
538 }
539
540 /**
541  *      memory_bm_next_pfn - find the pfn that corresponds to the next set bit
542  *      in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
543  *      returned.
544  *
545  *      It is required to run memory_bm_position_reset() before the first call to
546  *      this function.
547  */
548
549 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
550 {
551         struct bm_block *bb;
552         int bit;
553
554         bb = bm->cur.block;
555         do {
556                 bit = bm->cur.bit;
557                 bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
558                 if (bit < bm_block_bits(bb))
559                         goto Return_pfn;
560
561                 bb = list_entry(bb->hook.next, struct bm_block, hook);
562                 bm->cur.block = bb;
563                 bm->cur.bit = 0;
564         } while (&bb->hook != &bm->blocks);
565
566         memory_bm_position_reset(bm);
567         return BM_END_OF_MAP;
568
569  Return_pfn:
570         bm->cur.bit = bit + 1;
571         return bb->start_pfn + bit;
572 }
573
574 /**
575  *      This structure represents a range of page frames the contents of which
576  *      should not be saved during the suspend.
577  */
578
579 struct nosave_region {
580         struct list_head list;
581         unsigned long start_pfn;
582         unsigned long end_pfn;
583 };
584
585 static LIST_HEAD(nosave_regions);
586
587 /**
588  *      register_nosave_region - register a range of page frames the contents
589  *      of which should not be saved during the suspend (to be used in the early
590  *      initialization code)
591  */
592
593 void __init
594 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
595                          int use_kmalloc)
596 {
597         struct nosave_region *region;
598
599         if (start_pfn >= end_pfn)
600                 return;
601
602         if (!list_empty(&nosave_regions)) {
603                 /* Try to extend the previous region (they should be sorted) */
604                 region = list_entry(nosave_regions.prev,
605                                         struct nosave_region, list);
606                 if (region->end_pfn == start_pfn) {
607                         region->end_pfn = end_pfn;
608                         goto Report;
609                 }
610         }
611         if (use_kmalloc) {
612                 /* during init, this shouldn't fail */
613                 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
614                 BUG_ON(!region);
615         } else
616                 /* This allocation cannot fail */
617                 region = alloc_bootmem_low(sizeof(struct nosave_region));
618         region->start_pfn = start_pfn;
619         region->end_pfn = end_pfn;
620         list_add_tail(&region->list, &nosave_regions);
621  Report:
622         printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
623                 start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
624 }
625
626 /*
627  * Set bits in this map correspond to the page frames the contents of which
628  * should not be saved during the suspend.
629  */
630 static struct memory_bitmap *forbidden_pages_map;
631
632 /* Set bits in this map correspond to free page frames. */
633 static struct memory_bitmap *free_pages_map;
634
635 /*
636  * Each page frame allocated for creating the image is marked by setting the
637  * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
638  */
639
640 void swsusp_set_page_free(struct page *page)
641 {
642         if (free_pages_map)
643                 memory_bm_set_bit(free_pages_map, page_to_pfn(page));
644 }
645
646 static int swsusp_page_is_free(struct page *page)
647 {
648         return free_pages_map ?
649                 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
650 }
651
652 void swsusp_unset_page_free(struct page *page)
653 {
654         if (free_pages_map)
655                 memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
656 }
657
658 static void swsusp_set_page_forbidden(struct page *page)
659 {
660         if (forbidden_pages_map)
661                 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
662 }
663
664 int swsusp_page_is_forbidden(struct page *page)
665 {
666         return forbidden_pages_map ?
667                 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
668 }
669
670 static void swsusp_unset_page_forbidden(struct page *page)
671 {
672         if (forbidden_pages_map)
673                 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
674 }
675
676 /**
677  *      mark_nosave_pages - set bits corresponding to the page frames the
678  *      contents of which should not be saved in a given bitmap.
679  */
680
681 static void mark_nosave_pages(struct memory_bitmap *bm)
682 {
683         struct nosave_region *region;
684
685         if (list_empty(&nosave_regions))
686                 return;
687
688         list_for_each_entry(region, &nosave_regions, list) {
689                 unsigned long pfn;
690
691                 pr_debug("PM: Marking nosave pages: %016lx - %016lx\n",
692                                 region->start_pfn << PAGE_SHIFT,
693                                 region->end_pfn << PAGE_SHIFT);
694
695                 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
696                         if (pfn_valid(pfn)) {
697                                 /*
698                                  * It is safe to ignore the result of
699                                  * mem_bm_set_bit_check() here, since we won't
700                                  * touch the PFNs for which the error is
701                                  * returned anyway.
702                                  */
703                                 mem_bm_set_bit_check(bm, pfn);
704                         }
705         }
706 }
707
708 /**
709  *      create_basic_memory_bitmaps - create bitmaps needed for marking page
710  *      frames that should not be saved and free page frames.  The pointers
711  *      forbidden_pages_map and free_pages_map are only modified if everything
712  *      goes well, because we don't want the bits to be used before both bitmaps
713  *      are set up.
714  */
715
716 int create_basic_memory_bitmaps(void)
717 {
718         struct memory_bitmap *bm1, *bm2;
719         int error = 0;
720
721         BUG_ON(forbidden_pages_map || free_pages_map);
722
723         bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
724         if (!bm1)
725                 return -ENOMEM;
726
727         error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
728         if (error)
729                 goto Free_first_object;
730
731         bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
732         if (!bm2)
733                 goto Free_first_bitmap;
734
735         error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
736         if (error)
737                 goto Free_second_object;
738
739         forbidden_pages_map = bm1;
740         free_pages_map = bm2;
741         mark_nosave_pages(forbidden_pages_map);
742
743         pr_debug("PM: Basic memory bitmaps created\n");
744
745         return 0;
746
747  Free_second_object:
748         kfree(bm2);
749  Free_first_bitmap:
750         memory_bm_free(bm1, PG_UNSAFE_CLEAR);
751  Free_first_object:
752         kfree(bm1);
753         return -ENOMEM;
754 }
755
756 /**
757  *      free_basic_memory_bitmaps - free memory bitmaps allocated by
758  *      create_basic_memory_bitmaps().  The auxiliary pointers are necessary
759  *      so that the bitmaps themselves are not referred to while they are being
760  *      freed.
761  */
762
763 void free_basic_memory_bitmaps(void)
764 {
765         struct memory_bitmap *bm1, *bm2;
766
767         BUG_ON(!(forbidden_pages_map && free_pages_map));
768
769         bm1 = forbidden_pages_map;
770         bm2 = free_pages_map;
771         forbidden_pages_map = NULL;
772         free_pages_map = NULL;
773         memory_bm_free(bm1, PG_UNSAFE_CLEAR);
774         kfree(bm1);
775         memory_bm_free(bm2, PG_UNSAFE_CLEAR);
776         kfree(bm2);
777
778         pr_debug("PM: Basic memory bitmaps freed\n");
779 }
780
781 /**
782  *      snapshot_additional_pages - estimate the number of additional pages
783  *      be needed for setting up the suspend image data structures for given
784  *      zone (usually the returned value is greater than the exact number)
785  */
786
787 unsigned int snapshot_additional_pages(struct zone *zone)
788 {
789         unsigned int res;
790
791         res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
792         res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
793         return 2 * res;
794 }
795
796 #ifdef CONFIG_HIGHMEM
797 /**
798  *      count_free_highmem_pages - compute the total number of free highmem
799  *      pages, system-wide.
800  */
801
802 static unsigned int count_free_highmem_pages(void)
803 {
804         struct zone *zone;
805         unsigned int cnt = 0;
806
807         for_each_zone(zone)
808                 if (populated_zone(zone) && is_highmem(zone))
809                         cnt += zone_page_state(zone, NR_FREE_PAGES);
810
811         return cnt;
812 }
813
814 /**
815  *      saveable_highmem_page - Determine whether a highmem page should be
816  *      included in the suspend image.
817  *
818  *      We should save the page if it isn't Nosave or NosaveFree, or Reserved,
819  *      and it isn't a part of a free chunk of pages.
820  */
821 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
822 {
823         struct page *page;
824
825         if (!pfn_valid(pfn))
826                 return NULL;
827
828         page = pfn_to_page(pfn);
829         if (page_zone(page) != zone)
830                 return NULL;
831
832         BUG_ON(!PageHighMem(page));
833
834         if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
835             PageReserved(page))
836                 return NULL;
837
838         return page;
839 }
840
841 /**
842  *      count_highmem_pages - compute the total number of saveable highmem
843  *      pages.
844  */
845
846 unsigned int count_highmem_pages(void)
847 {
848         struct zone *zone;
849         unsigned int n = 0;
850
851         for_each_zone(zone) {
852                 unsigned long pfn, max_zone_pfn;
853
854                 if (!is_highmem(zone))
855                         continue;
856
857                 mark_free_pages(zone);
858                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
859                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
860                         if (saveable_highmem_page(zone, pfn))
861                                 n++;
862         }
863         return n;
864 }
865 #else
866 static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
867 {
868         return NULL;
869 }
870 #endif /* CONFIG_HIGHMEM */
871
872 /**
873  *      saveable_page - Determine whether a non-highmem page should be included
874  *      in the suspend image.
875  *
876  *      We should save the page if it isn't Nosave, and is not in the range
877  *      of pages statically defined as 'unsaveable', and it isn't a part of
878  *      a free chunk of pages.
879  */
880 static struct page *saveable_page(struct zone *zone, unsigned long pfn)
881 {
882         struct page *page;
883
884         if (!pfn_valid(pfn))
885                 return NULL;
886
887         page = pfn_to_page(pfn);
888         if (page_zone(page) != zone)
889                 return NULL;
890
891         BUG_ON(PageHighMem(page));
892
893         if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
894                 return NULL;
895
896         if (PageReserved(page)
897             && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
898                 return NULL;
899
900         return page;
901 }
902
903 /**
904  *      count_data_pages - compute the total number of saveable non-highmem
905  *      pages.
906  */
907
908 unsigned int count_data_pages(void)
909 {
910         struct zone *zone;
911         unsigned long pfn, max_zone_pfn;
912         unsigned int n = 0;
913
914         for_each_zone(zone) {
915                 if (is_highmem(zone))
916                         continue;
917
918                 mark_free_pages(zone);
919                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
920                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
921                         if (saveable_page(zone, pfn))
922                                 n++;
923         }
924         return n;
925 }
926
927 /* This is needed, because copy_page and memcpy are not usable for copying
928  * task structs.
929  */
930 static inline void do_copy_page(long *dst, long *src)
931 {
932         int n;
933
934         for (n = PAGE_SIZE / sizeof(long); n; n--)
935                 *dst++ = *src++;
936 }
937
938
939 /**
940  *      safe_copy_page - check if the page we are going to copy is marked as
941  *              present in the kernel page tables (this always is the case if
942  *              CONFIG_DEBUG_PAGEALLOC is not set and in that case
943  *              kernel_page_present() always returns 'true').
944  */
945 static void safe_copy_page(void *dst, struct page *s_page)
946 {
947         if (kernel_page_present(s_page)) {
948                 do_copy_page(dst, page_address(s_page));
949         } else {
950                 kernel_map_pages(s_page, 1, 1);
951                 do_copy_page(dst, page_address(s_page));
952                 kernel_map_pages(s_page, 1, 0);
953         }
954 }
955
956
957 #ifdef CONFIG_HIGHMEM
958 static inline struct page *
959 page_is_saveable(struct zone *zone, unsigned long pfn)
960 {
961         return is_highmem(zone) ?
962                 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
963 }
964
965 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
966 {
967         struct page *s_page, *d_page;
968         void *src, *dst;
969
970         s_page = pfn_to_page(src_pfn);
971         d_page = pfn_to_page(dst_pfn);
972         if (PageHighMem(s_page)) {
973                 src = kmap_atomic(s_page, KM_USER0);
974                 dst = kmap_atomic(d_page, KM_USER1);
975                 do_copy_page(dst, src);
976                 kunmap_atomic(src, KM_USER0);
977                 kunmap_atomic(dst, KM_USER1);
978         } else {
979                 if (PageHighMem(d_page)) {
980                         /* Page pointed to by src may contain some kernel
981                          * data modified by kmap_atomic()
982                          */
983                         safe_copy_page(buffer, s_page);
984                         dst = kmap_atomic(d_page, KM_USER0);
985                         memcpy(dst, buffer, PAGE_SIZE);
986                         kunmap_atomic(dst, KM_USER0);
987                 } else {
988                         safe_copy_page(page_address(d_page), s_page);
989                 }
990         }
991 }
992 #else
993 #define page_is_saveable(zone, pfn)     saveable_page(zone, pfn)
994
995 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
996 {
997         safe_copy_page(page_address(pfn_to_page(dst_pfn)),
998                                 pfn_to_page(src_pfn));
999 }
1000 #endif /* CONFIG_HIGHMEM */
1001
1002 static void
1003 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1004 {
1005         struct zone *zone;
1006         unsigned long pfn;
1007
1008         for_each_zone(zone) {
1009                 unsigned long max_zone_pfn;
1010
1011                 mark_free_pages(zone);
1012                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1013                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1014                         if (page_is_saveable(zone, pfn))
1015                                 memory_bm_set_bit(orig_bm, pfn);
1016         }
1017         memory_bm_position_reset(orig_bm);
1018         memory_bm_position_reset(copy_bm);
1019         for(;;) {
1020                 pfn = memory_bm_next_pfn(orig_bm);
1021                 if (unlikely(pfn == BM_END_OF_MAP))
1022                         break;
1023                 copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1024         }
1025 }
1026
1027 /* Total number of image pages */
1028 static unsigned int nr_copy_pages;
1029 /* Number of pages needed for saving the original pfns of the image pages */
1030 static unsigned int nr_meta_pages;
1031
1032 /**
1033  *      swsusp_free - free pages allocated for the suspend.
1034  *
1035  *      Suspend pages are alocated before the atomic copy is made, so we
1036  *      need to release them after the resume.
1037  */
1038
1039 void swsusp_free(void)
1040 {
1041         struct zone *zone;
1042         unsigned long pfn, max_zone_pfn;
1043
1044         for_each_zone(zone) {
1045                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1046                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1047                         if (pfn_valid(pfn)) {
1048                                 struct page *page = pfn_to_page(pfn);
1049
1050                                 if (swsusp_page_is_forbidden(page) &&
1051                                     swsusp_page_is_free(page)) {
1052                                         swsusp_unset_page_forbidden(page);
1053                                         swsusp_unset_page_free(page);
1054                                         __free_page(page);
1055                                 }
1056                         }
1057         }
1058         nr_copy_pages = 0;
1059         nr_meta_pages = 0;
1060         restore_pblist = NULL;
1061         buffer = NULL;
1062 }
1063
1064 #ifdef CONFIG_HIGHMEM
1065 /**
1066   *     count_pages_for_highmem - compute the number of non-highmem pages
1067   *     that will be necessary for creating copies of highmem pages.
1068   */
1069
1070 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1071 {
1072         unsigned int free_highmem = count_free_highmem_pages();
1073
1074         if (free_highmem >= nr_highmem)
1075                 nr_highmem = 0;
1076         else
1077                 nr_highmem -= free_highmem;
1078
1079         return nr_highmem;
1080 }
1081 #else
1082 static unsigned int
1083 count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1084 #endif /* CONFIG_HIGHMEM */
1085
1086 /**
1087  *      enough_free_mem - Make sure we have enough free memory for the
1088  *      snapshot image.
1089  */
1090
1091 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1092 {
1093         struct zone *zone;
1094         unsigned int free = 0, meta = 0;
1095
1096         for_each_zone(zone) {
1097                 meta += snapshot_additional_pages(zone);
1098                 if (!is_highmem(zone))
1099                         free += zone_page_state(zone, NR_FREE_PAGES);
1100         }
1101
1102         nr_pages += count_pages_for_highmem(nr_highmem);
1103         pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n",
1104                 nr_pages, PAGES_FOR_IO, meta, free);
1105
1106         return free > nr_pages + PAGES_FOR_IO + meta;
1107 }
1108
1109 #ifdef CONFIG_HIGHMEM
1110 /**
1111  *      get_highmem_buffer - if there are some highmem pages in the suspend
1112  *      image, we may need the buffer to copy them and/or load their data.
1113  */
1114
1115 static inline int get_highmem_buffer(int safe_needed)
1116 {
1117         buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1118         return buffer ? 0 : -ENOMEM;
1119 }
1120
1121 /**
1122  *      alloc_highmem_image_pages - allocate some highmem pages for the image.
1123  *      Try to allocate as many pages as needed, but if the number of free
1124  *      highmem pages is lesser than that, allocate them all.
1125  */
1126
1127 static inline unsigned int
1128 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1129 {
1130         unsigned int to_alloc = count_free_highmem_pages();
1131
1132         if (to_alloc > nr_highmem)
1133                 to_alloc = nr_highmem;
1134
1135         nr_highmem -= to_alloc;
1136         while (to_alloc-- > 0) {
1137                 struct page *page;
1138
1139                 page = alloc_image_page(__GFP_HIGHMEM);
1140                 memory_bm_set_bit(bm, page_to_pfn(page));
1141         }
1142         return nr_highmem;
1143 }
1144 #else
1145 static inline int get_highmem_buffer(int safe_needed) { return 0; }
1146
1147 static inline unsigned int
1148 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1149 #endif /* CONFIG_HIGHMEM */
1150
1151 /**
1152  *      swsusp_alloc - allocate memory for the suspend image
1153  *
1154  *      We first try to allocate as many highmem pages as there are
1155  *      saveable highmem pages in the system.  If that fails, we allocate
1156  *      non-highmem pages for the copies of the remaining highmem ones.
1157  *
1158  *      In this approach it is likely that the copies of highmem pages will
1159  *      also be located in the high memory, because of the way in which
1160  *      copy_data_pages() works.
1161  */
1162
1163 static int
1164 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1165                 unsigned int nr_pages, unsigned int nr_highmem)
1166 {
1167         int error;
1168
1169         error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1170         if (error)
1171                 goto Free;
1172
1173         error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1174         if (error)
1175                 goto Free;
1176
1177         if (nr_highmem > 0) {
1178                 error = get_highmem_buffer(PG_ANY);
1179                 if (error)
1180                         goto Free;
1181
1182                 nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
1183         }
1184         while (nr_pages-- > 0) {
1185                 struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1186
1187                 if (!page)
1188                         goto Free;
1189
1190                 memory_bm_set_bit(copy_bm, page_to_pfn(page));
1191         }
1192         return 0;
1193
1194  Free:
1195         swsusp_free();
1196         return -ENOMEM;
1197 }
1198
1199 /* Memory bitmap used for marking saveable pages (during suspend) or the
1200  * suspend image pages (during resume)
1201  */
1202 static struct memory_bitmap orig_bm;
1203 /* Memory bitmap used on suspend for marking allocated pages that will contain
1204  * the copies of saveable pages.  During resume it is initially used for
1205  * marking the suspend image pages, but then its set bits are duplicated in
1206  * @orig_bm and it is released.  Next, on systems with high memory, it may be
1207  * used for marking "safe" highmem pages, but it has to be reinitialized for
1208  * this purpose.
1209  */
1210 static struct memory_bitmap copy_bm;
1211
1212 asmlinkage int swsusp_save(void)
1213 {
1214         unsigned int nr_pages, nr_highmem;
1215
1216         printk(KERN_INFO "PM: Creating hibernation image: \n");
1217
1218         drain_local_pages(NULL);
1219         nr_pages = count_data_pages();
1220         nr_highmem = count_highmem_pages();
1221         printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1222
1223         if (!enough_free_mem(nr_pages, nr_highmem)) {
1224                 printk(KERN_ERR "PM: Not enough free memory\n");
1225                 return -ENOMEM;
1226         }
1227
1228         if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1229                 printk(KERN_ERR "PM: Memory allocation failed\n");
1230                 return -ENOMEM;
1231         }
1232
1233         /* During allocating of suspend pagedir, new cold pages may appear.
1234          * Kill them.
1235          */
1236         drain_local_pages(NULL);
1237         copy_data_pages(&copy_bm, &orig_bm);
1238
1239         /*
1240          * End of critical section. From now on, we can write to memory,
1241          * but we should not touch disk. This specially means we must _not_
1242          * touch swap space! Except we must write out our image of course.
1243          */
1244
1245         nr_pages += nr_highmem;
1246         nr_copy_pages = nr_pages;
1247         nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1248
1249         printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1250                 nr_pages);
1251
1252         return 0;
1253 }
1254
1255 #ifndef CONFIG_ARCH_HIBERNATION_HEADER
1256 static int init_header_complete(struct swsusp_info *info)
1257 {
1258         memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1259         info->version_code = LINUX_VERSION_CODE;
1260         return 0;
1261 }
1262
1263 static char *check_image_kernel(struct swsusp_info *info)
1264 {
1265         if (info->version_code != LINUX_VERSION_CODE)
1266                 return "kernel version";
1267         if (strcmp(info->uts.sysname,init_utsname()->sysname))
1268                 return "system type";
1269         if (strcmp(info->uts.release,init_utsname()->release))
1270                 return "kernel release";
1271         if (strcmp(info->uts.version,init_utsname()->version))
1272                 return "version";
1273         if (strcmp(info->uts.machine,init_utsname()->machine))
1274                 return "machine";
1275         return NULL;
1276 }
1277 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1278
1279 unsigned long snapshot_get_image_size(void)
1280 {
1281         return nr_copy_pages + nr_meta_pages + 1;
1282 }
1283
1284 static int init_header(struct swsusp_info *info)
1285 {
1286         memset(info, 0, sizeof(struct swsusp_info));
1287         info->num_physpages = num_physpages;
1288         info->image_pages = nr_copy_pages;
1289         info->pages = snapshot_get_image_size();
1290         info->size = info->pages;
1291         info->size <<= PAGE_SHIFT;
1292         return init_header_complete(info);
1293 }
1294
1295 /**
1296  *      pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1297  *      are stored in the array @buf[] (1 page at a time)
1298  */
1299
1300 static inline void
1301 pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1302 {
1303         int j;
1304
1305         for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1306                 buf[j] = memory_bm_next_pfn(bm);
1307                 if (unlikely(buf[j] == BM_END_OF_MAP))
1308                         break;
1309         }
1310 }
1311
1312 /**
1313  *      snapshot_read_next - used for reading the system memory snapshot.
1314  *
1315  *      On the first call to it @handle should point to a zeroed
1316  *      snapshot_handle structure.  The structure gets updated and a pointer
1317  *      to it should be passed to this function every next time.
1318  *
1319  *      The @count parameter should contain the number of bytes the caller
1320  *      wants to read from the snapshot.  It must not be zero.
1321  *
1322  *      On success the function returns a positive number.  Then, the caller
1323  *      is allowed to read up to the returned number of bytes from the memory
1324  *      location computed by the data_of() macro.  The number returned
1325  *      may be smaller than @count, but this only happens if the read would
1326  *      cross a page boundary otherwise.
1327  *
1328  *      The function returns 0 to indicate the end of data stream condition,
1329  *      and a negative number is returned on error.  In such cases the
1330  *      structure pointed to by @handle is not updated and should not be used
1331  *      any more.
1332  */
1333
1334 int snapshot_read_next(struct snapshot_handle *handle, size_t count)
1335 {
1336         if (handle->cur > nr_meta_pages + nr_copy_pages)
1337                 return 0;
1338
1339         if (!buffer) {
1340                 /* This makes the buffer be freed by swsusp_free() */
1341                 buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1342                 if (!buffer)
1343                         return -ENOMEM;
1344         }
1345         if (!handle->offset) {
1346                 int error;
1347
1348                 error = init_header((struct swsusp_info *)buffer);
1349                 if (error)
1350                         return error;
1351                 handle->buffer = buffer;
1352                 memory_bm_position_reset(&orig_bm);
1353                 memory_bm_position_reset(&copy_bm);
1354         }
1355         if (handle->prev < handle->cur) {
1356                 if (handle->cur <= nr_meta_pages) {
1357                         memset(buffer, 0, PAGE_SIZE);
1358                         pack_pfns(buffer, &orig_bm);
1359                 } else {
1360                         struct page *page;
1361
1362                         page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1363                         if (PageHighMem(page)) {
1364                                 /* Highmem pages are copied to the buffer,
1365                                  * because we can't return with a kmapped
1366                                  * highmem page (we may not be called again).
1367                                  */
1368                                 void *kaddr;
1369
1370                                 kaddr = kmap_atomic(page, KM_USER0);
1371                                 memcpy(buffer, kaddr, PAGE_SIZE);
1372                                 kunmap_atomic(kaddr, KM_USER0);
1373                                 handle->buffer = buffer;
1374                         } else {
1375                                 handle->buffer = page_address(page);
1376                         }
1377                 }
1378                 handle->prev = handle->cur;
1379         }
1380         handle->buf_offset = handle->cur_offset;
1381         if (handle->cur_offset + count >= PAGE_SIZE) {
1382                 count = PAGE_SIZE - handle->cur_offset;
1383                 handle->cur_offset = 0;
1384                 handle->cur++;
1385         } else {
1386                 handle->cur_offset += count;
1387         }
1388         handle->offset += count;
1389         return count;
1390 }
1391
1392 /**
1393  *      mark_unsafe_pages - mark the pages that cannot be used for storing
1394  *      the image during resume, because they conflict with the pages that
1395  *      had been used before suspend
1396  */
1397
1398 static int mark_unsafe_pages(struct memory_bitmap *bm)
1399 {
1400         struct zone *zone;
1401         unsigned long pfn, max_zone_pfn;
1402
1403         /* Clear page flags */
1404         for_each_zone(zone) {
1405                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1406                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1407                         if (pfn_valid(pfn))
1408                                 swsusp_unset_page_free(pfn_to_page(pfn));
1409         }
1410
1411         /* Mark pages that correspond to the "original" pfns as "unsafe" */
1412         memory_bm_position_reset(bm);
1413         do {
1414                 pfn = memory_bm_next_pfn(bm);
1415                 if (likely(pfn != BM_END_OF_MAP)) {
1416                         if (likely(pfn_valid(pfn)))
1417                                 swsusp_set_page_free(pfn_to_page(pfn));
1418                         else
1419                                 return -EFAULT;
1420                 }
1421         } while (pfn != BM_END_OF_MAP);
1422
1423         allocated_unsafe_pages = 0;
1424
1425         return 0;
1426 }
1427
1428 static void
1429 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1430 {
1431         unsigned long pfn;
1432
1433         memory_bm_position_reset(src);
1434         pfn = memory_bm_next_pfn(src);
1435         while (pfn != BM_END_OF_MAP) {
1436                 memory_bm_set_bit(dst, pfn);
1437                 pfn = memory_bm_next_pfn(src);
1438         }
1439 }
1440
1441 static int check_header(struct swsusp_info *info)
1442 {
1443         char *reason;
1444
1445         reason = check_image_kernel(info);
1446         if (!reason && info->num_physpages != num_physpages)
1447                 reason = "memory size";
1448         if (reason) {
1449                 printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1450                 return -EPERM;
1451         }
1452         return 0;
1453 }
1454
1455 /**
1456  *      load header - check the image header and copy data from it
1457  */
1458
1459 static int
1460 load_header(struct swsusp_info *info)
1461 {
1462         int error;
1463
1464         restore_pblist = NULL;
1465         error = check_header(info);
1466         if (!error) {
1467                 nr_copy_pages = info->image_pages;
1468                 nr_meta_pages = info->pages - info->image_pages - 1;
1469         }
1470         return error;
1471 }
1472
1473 /**
1474  *      unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1475  *      the corresponding bit in the memory bitmap @bm
1476  */
1477 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1478 {
1479         int j;
1480
1481         for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1482                 if (unlikely(buf[j] == BM_END_OF_MAP))
1483                         break;
1484
1485                 if (memory_bm_pfn_present(bm, buf[j]))
1486                         memory_bm_set_bit(bm, buf[j]);
1487                 else
1488                         return -EFAULT;
1489         }
1490
1491         return 0;
1492 }
1493
1494 /* List of "safe" pages that may be used to store data loaded from the suspend
1495  * image
1496  */
1497 static struct linked_page *safe_pages_list;
1498
1499 #ifdef CONFIG_HIGHMEM
1500 /* struct highmem_pbe is used for creating the list of highmem pages that
1501  * should be restored atomically during the resume from disk, because the page
1502  * frames they have occupied before the suspend are in use.
1503  */
1504 struct highmem_pbe {
1505         struct page *copy_page; /* data is here now */
1506         struct page *orig_page; /* data was here before the suspend */
1507         struct highmem_pbe *next;
1508 };
1509
1510 /* List of highmem PBEs needed for restoring the highmem pages that were
1511  * allocated before the suspend and included in the suspend image, but have
1512  * also been allocated by the "resume" kernel, so their contents cannot be
1513  * written directly to their "original" page frames.
1514  */
1515 static struct highmem_pbe *highmem_pblist;
1516
1517 /**
1518  *      count_highmem_image_pages - compute the number of highmem pages in the
1519  *      suspend image.  The bits in the memory bitmap @bm that correspond to the
1520  *      image pages are assumed to be set.
1521  */
1522
1523 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1524 {
1525         unsigned long pfn;
1526         unsigned int cnt = 0;
1527
1528         memory_bm_position_reset(bm);
1529         pfn = memory_bm_next_pfn(bm);
1530         while (pfn != BM_END_OF_MAP) {
1531                 if (PageHighMem(pfn_to_page(pfn)))
1532                         cnt++;
1533
1534                 pfn = memory_bm_next_pfn(bm);
1535         }
1536         return cnt;
1537 }
1538
1539 /**
1540  *      prepare_highmem_image - try to allocate as many highmem pages as
1541  *      there are highmem image pages (@nr_highmem_p points to the variable
1542  *      containing the number of highmem image pages).  The pages that are
1543  *      "safe" (ie. will not be overwritten when the suspend image is
1544  *      restored) have the corresponding bits set in @bm (it must be
1545  *      unitialized).
1546  *
1547  *      NOTE: This function should not be called if there are no highmem
1548  *      image pages.
1549  */
1550
1551 static unsigned int safe_highmem_pages;
1552
1553 static struct memory_bitmap *safe_highmem_bm;
1554
1555 static int
1556 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1557 {
1558         unsigned int to_alloc;
1559
1560         if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1561                 return -ENOMEM;
1562
1563         if (get_highmem_buffer(PG_SAFE))
1564                 return -ENOMEM;
1565
1566         to_alloc = count_free_highmem_pages();
1567         if (to_alloc > *nr_highmem_p)
1568                 to_alloc = *nr_highmem_p;
1569         else
1570                 *nr_highmem_p = to_alloc;
1571
1572         safe_highmem_pages = 0;
1573         while (to_alloc-- > 0) {
1574                 struct page *page;
1575
1576                 page = alloc_page(__GFP_HIGHMEM);
1577                 if (!swsusp_page_is_free(page)) {
1578                         /* The page is "safe", set its bit the bitmap */
1579                         memory_bm_set_bit(bm, page_to_pfn(page));
1580                         safe_highmem_pages++;
1581                 }
1582                 /* Mark the page as allocated */
1583                 swsusp_set_page_forbidden(page);
1584                 swsusp_set_page_free(page);
1585         }
1586         memory_bm_position_reset(bm);
1587         safe_highmem_bm = bm;
1588         return 0;
1589 }
1590
1591 /**
1592  *      get_highmem_page_buffer - for given highmem image page find the buffer
1593  *      that suspend_write_next() should set for its caller to write to.
1594  *
1595  *      If the page is to be saved to its "original" page frame or a copy of
1596  *      the page is to be made in the highmem, @buffer is returned.  Otherwise,
1597  *      the copy of the page is to be made in normal memory, so the address of
1598  *      the copy is returned.
1599  *
1600  *      If @buffer is returned, the caller of suspend_write_next() will write
1601  *      the page's contents to @buffer, so they will have to be copied to the
1602  *      right location on the next call to suspend_write_next() and it is done
1603  *      with the help of copy_last_highmem_page().  For this purpose, if
1604  *      @buffer is returned, @last_highmem page is set to the page to which
1605  *      the data will have to be copied from @buffer.
1606  */
1607
1608 static struct page *last_highmem_page;
1609
1610 static void *
1611 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1612 {
1613         struct highmem_pbe *pbe;
1614         void *kaddr;
1615
1616         if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1617                 /* We have allocated the "original" page frame and we can
1618                  * use it directly to store the loaded page.
1619                  */
1620                 last_highmem_page = page;
1621                 return buffer;
1622         }
1623         /* The "original" page frame has not been allocated and we have to
1624          * use a "safe" page frame to store the loaded page.
1625          */
1626         pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1627         if (!pbe) {
1628                 swsusp_free();
1629                 return ERR_PTR(-ENOMEM);
1630         }
1631         pbe->orig_page = page;
1632         if (safe_highmem_pages > 0) {
1633                 struct page *tmp;
1634
1635                 /* Copy of the page will be stored in high memory */
1636                 kaddr = buffer;
1637                 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1638                 safe_highmem_pages--;
1639                 last_highmem_page = tmp;
1640                 pbe->copy_page = tmp;
1641         } else {
1642                 /* Copy of the page will be stored in normal memory */
1643                 kaddr = safe_pages_list;
1644                 safe_pages_list = safe_pages_list->next;
1645                 pbe->copy_page = virt_to_page(kaddr);
1646         }
1647         pbe->next = highmem_pblist;
1648         highmem_pblist = pbe;
1649         return kaddr;
1650 }
1651
1652 /**
1653  *      copy_last_highmem_page - copy the contents of a highmem image from
1654  *      @buffer, where the caller of snapshot_write_next() has place them,
1655  *      to the right location represented by @last_highmem_page .
1656  */
1657
1658 static void copy_last_highmem_page(void)
1659 {
1660         if (last_highmem_page) {
1661                 void *dst;
1662
1663                 dst = kmap_atomic(last_highmem_page, KM_USER0);
1664                 memcpy(dst, buffer, PAGE_SIZE);
1665                 kunmap_atomic(dst, KM_USER0);
1666                 last_highmem_page = NULL;
1667         }
1668 }
1669
1670 static inline int last_highmem_page_copied(void)
1671 {
1672         return !last_highmem_page;
1673 }
1674
1675 static inline void free_highmem_data(void)
1676 {
1677         if (safe_highmem_bm)
1678                 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
1679
1680         if (buffer)
1681                 free_image_page(buffer, PG_UNSAFE_CLEAR);
1682 }
1683 #else
1684 static inline int get_safe_write_buffer(void) { return 0; }
1685
1686 static unsigned int
1687 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
1688
1689 static inline int
1690 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1691 {
1692         return 0;
1693 }
1694
1695 static inline void *
1696 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1697 {
1698         return ERR_PTR(-EINVAL);
1699 }
1700
1701 static inline void copy_last_highmem_page(void) {}
1702 static inline int last_highmem_page_copied(void) { return 1; }
1703 static inline void free_highmem_data(void) {}
1704 #endif /* CONFIG_HIGHMEM */
1705
1706 /**
1707  *      prepare_image - use the memory bitmap @bm to mark the pages that will
1708  *      be overwritten in the process of restoring the system memory state
1709  *      from the suspend image ("unsafe" pages) and allocate memory for the
1710  *      image.
1711  *
1712  *      The idea is to allocate a new memory bitmap first and then allocate
1713  *      as many pages as needed for the image data, but not to assign these
1714  *      pages to specific tasks initially.  Instead, we just mark them as
1715  *      allocated and create a lists of "safe" pages that will be used
1716  *      later.  On systems with high memory a list of "safe" highmem pages is
1717  *      also created.
1718  */
1719
1720 #define PBES_PER_LINKED_PAGE    (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
1721
1722 static int
1723 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1724 {
1725         unsigned int nr_pages, nr_highmem;
1726         struct linked_page *sp_list, *lp;
1727         int error;
1728
1729         /* If there is no highmem, the buffer will not be necessary */
1730         free_image_page(buffer, PG_UNSAFE_CLEAR);
1731         buffer = NULL;
1732
1733         nr_highmem = count_highmem_image_pages(bm);
1734         error = mark_unsafe_pages(bm);
1735         if (error)
1736                 goto Free;
1737
1738         error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
1739         if (error)
1740                 goto Free;
1741
1742         duplicate_memory_bitmap(new_bm, bm);
1743         memory_bm_free(bm, PG_UNSAFE_KEEP);
1744         if (nr_highmem > 0) {
1745                 error = prepare_highmem_image(bm, &nr_highmem);
1746                 if (error)
1747                         goto Free;
1748         }
1749         /* Reserve some safe pages for potential later use.
1750          *
1751          * NOTE: This way we make sure there will be enough safe pages for the
1752          * chain_alloc() in get_buffer().  It is a bit wasteful, but
1753          * nr_copy_pages cannot be greater than 50% of the memory anyway.
1754          */
1755         sp_list = NULL;
1756         /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
1757         nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1758         nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
1759         while (nr_pages > 0) {
1760                 lp = get_image_page(GFP_ATOMIC, PG_SAFE);
1761                 if (!lp) {
1762                         error = -ENOMEM;
1763                         goto Free;
1764                 }
1765                 lp->next = sp_list;
1766                 sp_list = lp;
1767                 nr_pages--;
1768         }
1769         /* Preallocate memory for the image */
1770         safe_pages_list = NULL;
1771         nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1772         while (nr_pages > 0) {
1773                 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
1774                 if (!lp) {
1775                         error = -ENOMEM;
1776                         goto Free;
1777                 }
1778                 if (!swsusp_page_is_free(virt_to_page(lp))) {
1779                         /* The page is "safe", add it to the list */
1780                         lp->next = safe_pages_list;
1781                         safe_pages_list = lp;
1782                 }
1783                 /* Mark the page as allocated */
1784                 swsusp_set_page_forbidden(virt_to_page(lp));
1785                 swsusp_set_page_free(virt_to_page(lp));
1786                 nr_pages--;
1787         }
1788         /* Free the reserved safe pages so that chain_alloc() can use them */
1789         while (sp_list) {
1790                 lp = sp_list->next;
1791                 free_image_page(sp_list, PG_UNSAFE_CLEAR);
1792                 sp_list = lp;
1793         }
1794         return 0;
1795
1796  Free:
1797         swsusp_free();
1798         return error;
1799 }
1800
1801 /**
1802  *      get_buffer - compute the address that snapshot_write_next() should
1803  *      set for its caller to write to.
1804  */
1805
1806 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1807 {
1808         struct pbe *pbe;
1809         struct page *page;
1810         unsigned long pfn = memory_bm_next_pfn(bm);
1811
1812         if (pfn == BM_END_OF_MAP)
1813                 return ERR_PTR(-EFAULT);
1814
1815         page = pfn_to_page(pfn);
1816         if (PageHighMem(page))
1817                 return get_highmem_page_buffer(page, ca);
1818
1819         if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
1820                 /* We have allocated the "original" page frame and we can
1821                  * use it directly to store the loaded page.
1822                  */
1823                 return page_address(page);
1824
1825         /* The "original" page frame has not been allocated and we have to
1826          * use a "safe" page frame to store the loaded page.
1827          */
1828         pbe = chain_alloc(ca, sizeof(struct pbe));
1829         if (!pbe) {
1830                 swsusp_free();
1831                 return ERR_PTR(-ENOMEM);
1832         }
1833         pbe->orig_address = page_address(page);
1834         pbe->address = safe_pages_list;
1835         safe_pages_list = safe_pages_list->next;
1836         pbe->next = restore_pblist;
1837         restore_pblist = pbe;
1838         return pbe->address;
1839 }
1840
1841 /**
1842  *      snapshot_write_next - used for writing the system memory snapshot.
1843  *
1844  *      On the first call to it @handle should point to a zeroed
1845  *      snapshot_handle structure.  The structure gets updated and a pointer
1846  *      to it should be passed to this function every next time.
1847  *
1848  *      The @count parameter should contain the number of bytes the caller
1849  *      wants to write to the image.  It must not be zero.
1850  *
1851  *      On success the function returns a positive number.  Then, the caller
1852  *      is allowed to write up to the returned number of bytes to the memory
1853  *      location computed by the data_of() macro.  The number returned
1854  *      may be smaller than @count, but this only happens if the write would
1855  *      cross a page boundary otherwise.
1856  *
1857  *      The function returns 0 to indicate the "end of file" condition,
1858  *      and a negative number is returned on error.  In such cases the
1859  *      structure pointed to by @handle is not updated and should not be used
1860  *      any more.
1861  */
1862
1863 int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1864 {
1865         static struct chain_allocator ca;
1866         int error = 0;
1867
1868         /* Check if we have already loaded the entire image */
1869         if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
1870                 return 0;
1871
1872         if (handle->offset == 0) {
1873                 if (!buffer)
1874                         /* This makes the buffer be freed by swsusp_free() */
1875                         buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1876
1877                 if (!buffer)
1878                         return -ENOMEM;
1879
1880                 handle->buffer = buffer;
1881         }
1882         handle->sync_read = 1;
1883         if (handle->prev < handle->cur) {
1884                 if (handle->prev == 0) {
1885                         error = load_header(buffer);
1886                         if (error)
1887                                 return error;
1888
1889                         error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
1890                         if (error)
1891                                 return error;
1892
1893                 } else if (handle->prev <= nr_meta_pages) {
1894                         error = unpack_orig_pfns(buffer, &copy_bm);
1895                         if (error)
1896                                 return error;
1897
1898                         if (handle->prev == nr_meta_pages) {
1899                                 error = prepare_image(&orig_bm, &copy_bm);
1900                                 if (error)
1901                                         return error;
1902
1903                                 chain_init(&ca, GFP_ATOMIC, PG_SAFE);
1904                                 memory_bm_position_reset(&orig_bm);
1905                                 restore_pblist = NULL;
1906                                 handle->buffer = get_buffer(&orig_bm, &ca);
1907                                 handle->sync_read = 0;
1908                                 if (IS_ERR(handle->buffer))
1909                                         return PTR_ERR(handle->buffer);
1910                         }
1911                 } else {
1912                         copy_last_highmem_page();
1913                         handle->buffer = get_buffer(&orig_bm, &ca);
1914                         if (IS_ERR(handle->buffer))
1915                                 return PTR_ERR(handle->buffer);
1916                         if (handle->buffer != buffer)
1917                                 handle->sync_read = 0;
1918                 }
1919                 handle->prev = handle->cur;
1920         }
1921         handle->buf_offset = handle->cur_offset;
1922         if (handle->cur_offset + count >= PAGE_SIZE) {
1923                 count = PAGE_SIZE - handle->cur_offset;
1924                 handle->cur_offset = 0;
1925                 handle->cur++;
1926         } else {
1927                 handle->cur_offset += count;
1928         }
1929         handle->offset += count;
1930         return count;
1931 }
1932
1933 /**
1934  *      snapshot_write_finalize - must be called after the last call to
1935  *      snapshot_write_next() in case the last page in the image happens
1936  *      to be a highmem page and its contents should be stored in the
1937  *      highmem.  Additionally, it releases the memory that will not be
1938  *      used any more.
1939  */
1940
1941 void snapshot_write_finalize(struct snapshot_handle *handle)
1942 {
1943         copy_last_highmem_page();
1944         /* Free only if we have loaded the image entirely */
1945         if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
1946                 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
1947                 free_highmem_data();
1948         }
1949 }
1950
1951 int snapshot_image_loaded(struct snapshot_handle *handle)
1952 {
1953         return !(!nr_copy_pages || !last_highmem_page_copied() ||
1954                         handle->cur <= nr_meta_pages + nr_copy_pages);
1955 }
1956
1957 #ifdef CONFIG_HIGHMEM
1958 /* Assumes that @buf is ready and points to a "safe" page */
1959 static inline void
1960 swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
1961 {
1962         void *kaddr1, *kaddr2;
1963
1964         kaddr1 = kmap_atomic(p1, KM_USER0);
1965         kaddr2 = kmap_atomic(p2, KM_USER1);
1966         memcpy(buf, kaddr1, PAGE_SIZE);
1967         memcpy(kaddr1, kaddr2, PAGE_SIZE);
1968         memcpy(kaddr2, buf, PAGE_SIZE);
1969         kunmap_atomic(kaddr1, KM_USER0);
1970         kunmap_atomic(kaddr2, KM_USER1);
1971 }
1972
1973 /**
1974  *      restore_highmem - for each highmem page that was allocated before
1975  *      the suspend and included in the suspend image, and also has been
1976  *      allocated by the "resume" kernel swap its current (ie. "before
1977  *      resume") contents with the previous (ie. "before suspend") one.
1978  *
1979  *      If the resume eventually fails, we can call this function once
1980  *      again and restore the "before resume" highmem state.
1981  */
1982
1983 int restore_highmem(void)
1984 {
1985         struct highmem_pbe *pbe = highmem_pblist;
1986         void *buf;
1987
1988         if (!pbe)
1989                 return 0;
1990
1991         buf = get_image_page(GFP_ATOMIC, PG_SAFE);
1992         if (!buf)
1993                 return -ENOMEM;
1994
1995         while (pbe) {
1996                 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
1997                 pbe = pbe->next;
1998         }
1999         free_image_page(buf, PG_UNSAFE_CLEAR);
2000         return 0;
2001 }
2002 #endif /* CONFIG_HIGHMEM */