X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fkexec.c;h=474a84715eaca2936b51ad5a6c46fb4774751c5b;hb=3374cd1abd478f767aaedf2c21d109596ff0fe72;hp=a0411b3bd54a58d0df0c7ee3036b9a6c37871933;hpb=50cccc699ed849d31c9e3f7643db33edade20e4e;p=safe%2Fjmp%2Flinux-2.6

diff --git a/kernel/kexec.c b/kernel/kexec.c
index a0411b3..474a847 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,23 +6,48 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/kexec.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/highmem.h>
 #include <linux/syscalls.h>
 #include <linux/reboot.h>
-#include <linux/syscalls.h>
 #include <linux/ioport.h>
+#include <linux/hardirq.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <generated/utsrelease.h>
+#include <linux/utsname.h>
+#include <linux/numa.h>
+#include <linux/suspend.h>
+#include <linux/device.h>
+#include <linux/freezer.h>
+#include <linux/pm.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/vmalloc.h>
+#include <linux/swap.h>
+#include <linux/kmsg_dump.h>
+
 #include <asm/page.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/semaphore.h>
+#include <asm/sections.h>
+
+/* Per cpu memory for storing cpu states in case of system crash. */
+note_buf_t __percpu *crash_notes;
+
+/* vmcoreinfo stuff */
+static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
+u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+size_t vmcoreinfo_size;
+size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
 
 /* Location of the reserved area for the crash kernel */
 struct resource crashk_res = {
@@ -32,6 +57,13 @@ struct resource crashk_res = {
 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
 };
 
+int kexec_should_crash(struct task_struct *p)
+{
+	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
+		return 1;
+	return 0;
+}
+
 /*
  * When kexec transitions to the new kernel there is a one-to-one
  * mapping between physical and virtual addresses.  On processors
@@ -48,7 +80,7 @@ struct resource crashk_res = {
  *
  * The code for the transition from the current kernel to the
  * the new kernel is placed in the control_code_buffer, whose size
- * is given by KEXEC_CONTROL_CODE_SIZE.  In the best case only a single
+ * is given by KEXEC_CONTROL_PAGE_SIZE.  In the best case only a single
  * page of memory is necessary, but some architectures require more.
  * Because this memory must be identity mapped in the transition from
  * virtual to physical addresses it must live in the range
@@ -78,12 +110,15 @@ struct resource crashk_res = {
  */
 #define KIMAGE_NO_DEST (-1UL)
 
-static int kimage_is_destination_range(
-	struct kimage *image, unsigned long start, unsigned long end);
-static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long dest);
+static int kimage_is_destination_range(struct kimage *image,
+				       unsigned long start, unsigned long end);
+static struct page *kimage_alloc_page(struct kimage *image,
+				       gfp_t gfp_mask,
+				       unsigned long dest);
 
 static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
-	unsigned long nr_segments, struct kexec_segment __user *segments)
+	                    unsigned long nr_segments,
+                            struct kexec_segment __user *segments)
 {
 	size_t segment_bytes;
 	struct kimage *image;
@@ -92,11 +127,10 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
 
 	/* Allocate a controlling structure */
 	result = -ENOMEM;
-	image = kmalloc(sizeof(*image), GFP_KERNEL);
-	if (!image) {
+	image = kzalloc(sizeof(*image), GFP_KERNEL);
+	if (!image)
 		goto out;
-	}
-	memset(image, 0, sizeof(*image));
+
 	image->head = 0;
 	image->entry = &image->head;
 	image->last_entry = &image->head;
@@ -136,6 +170,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
 	result = -EADDRNOTAVAIL;
 	for (i = 0; i < nr_segments; i++) {
 		unsigned long mstart, mend;
+
 		mstart = image->segment[i].mem;
 		mend   = mstart + image->segment[i].memsz;
 		if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
@@ -150,12 +185,13 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
 	 * easy explanation as one segment stops on another.
 	 */
 	result = -EINVAL;
-	for(i = 0; i < nr_segments; i++) {
+	for (i = 0; i < nr_segments; i++) {
 		unsigned long mstart, mend;
 		unsigned long j;
+
 		mstart = image->segment[i].mem;
 		mend   = mstart + image->segment[i].memsz;
-		for(j = 0; j < i; j++) {
+		for (j = 0; j < i; j++) {
 			unsigned long pstart, pend;
 			pstart = image->segment[j].mem;
 			pend   = pstart + image->segment[j].memsz;
@@ -171,25 +207,25 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
 	 * later on.
 	 */
 	result = -EINVAL;
-	for(i = 0; i < nr_segments; i++) {
+	for (i = 0; i < nr_segments; i++) {
 		if (image->segment[i].bufsz > image->segment[i].memsz)
 			goto out;
 	}
 
-
 	result = 0;
- out:
-	if (result == 0) {
+out:
+	if (result == 0)
 		*rimage = image;
-	} else {
+	else
 		kfree(image);
-	}
+
 	return result;
 
 }
 
 static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
-	unsigned long nr_segments, struct kexec_segment __user *segments)
+				unsigned long nr_segments,
+				struct kexec_segment __user *segments)
 {
 	int result;
 	struct kimage *image;
@@ -197,9 +233,9 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
 	/* Allocate and initialize a controlling structure */
 	image = NULL;
 	result = do_kimage_alloc(&image, entry, nr_segments, segments);
-	if (result) {
+	if (result)
 		goto out;
-	}
+
 	*rimage = image;
 
 	/*
@@ -209,24 +245,31 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
 	 */
 	result = -ENOMEM;
 	image->control_code_page = kimage_alloc_control_pages(image,
-		get_order(KEXEC_CONTROL_CODE_SIZE));
+					   get_order(KEXEC_CONTROL_PAGE_SIZE));
 	if (!image->control_code_page) {
 		printk(KERN_ERR "Could not allocate control_code_buffer\n");
 		goto out;
 	}
 
+	image->swap_page = kimage_alloc_control_pages(image, 0);
+	if (!image->swap_page) {
+		printk(KERN_ERR "Could not allocate swap buffer\n");
+		goto out;
+	}
+
 	result = 0;
  out:
-	if (result == 0) {
+	if (result == 0)
 		*rimage = image;
-	} else {
+	else
 		kfree(image);
-	}
+
 	return result;
 }
 
 static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
-	unsigned long nr_segments, struct kexec_segment *segments)
+				unsigned long nr_segments,
+				struct kexec_segment __user *segments)
 {
 	int result;
 	struct kimage *image;
@@ -241,9 +284,8 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
 
 	/* Allocate and initialize a controlling structure */
 	result = do_kimage_alloc(&image, entry, nr_segments, segments);
-	if (result) {
+	if (result)
 		goto out;
-	}
 
 	/* Enable the special crash kernel control page
 	 * allocation policy.
@@ -263,6 +305,7 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
 	result = -EADDRNOTAVAIL;
 	for (i = 0; i < nr_segments; i++) {
 		unsigned long mstart, mend;
+
 		mstart = image->segment[i].mem;
 		mend = mstart + image->segment[i].memsz - 1;
 		/* Ensure we are within the crash kernel limits */
@@ -270,7 +313,6 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
 			goto out;
 	}
 
-
 	/*
 	 * Find a location for the control code buffer, and add
 	 * the vector of segments so that it's pages will also be
@@ -278,80 +320,83 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
 	 */
 	result = -ENOMEM;
 	image->control_code_page = kimage_alloc_control_pages(image,
-		get_order(KEXEC_CONTROL_CODE_SIZE));
+					   get_order(KEXEC_CONTROL_PAGE_SIZE));
 	if (!image->control_code_page) {
 		printk(KERN_ERR "Could not allocate control_code_buffer\n");
 		goto out;
 	}
 
 	result = 0;
- out:
-	if (result == 0) {
+out:
+	if (result == 0)
 		*rimage = image;
-	} else {
+	else
 		kfree(image);
-	}
+
 	return result;
 }
 
-static int kimage_is_destination_range(
-	struct kimage *image, unsigned long start, unsigned long end)
+static int kimage_is_destination_range(struct kimage *image,
+					unsigned long start,
+					unsigned long end)
 {
 	unsigned long i;
 
 	for (i = 0; i < image->nr_segments; i++) {
 		unsigned long mstart, mend;
+
 		mstart = image->segment[i].mem;
-		mend   = mstart + image->segment[i].memsz;
-		if ((end > mstart) && (start < mend)) {
+		mend = mstart + image->segment[i].memsz;
+		if ((end > mstart) && (start < mend))
 			return 1;
-		}
 	}
+
 	return 0;
 }
 
-static struct page *kimage_alloc_pages(unsigned int gfp_mask, unsigned int order)
+static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
 	struct page *pages;
+
 	pages = alloc_pages(gfp_mask, order);
 	if (pages) {
 		unsigned int count, i;
 		pages->mapping = NULL;
-		pages->private = order;
+		set_page_private(pages, order);
 		count = 1 << order;
-		for(i = 0; i < count; i++) {
+		for (i = 0; i < count; i++)
 			SetPageReserved(pages + i);
-		}
 	}
+
 	return pages;
 }
 
 static void kimage_free_pages(struct page *page)
 {
 	unsigned int order, count, i;
-	order = page->private;
+
+	order = page_private(page);
 	count = 1 << order;
-	for(i = 0; i < count; i++) {
+	for (i = 0; i < count; i++)
 		ClearPageReserved(page + i);
-	}
 	__free_pages(page, order);
 }
 
 static void kimage_free_page_list(struct list_head *list)
 {
 	struct list_head *pos, *next;
+
 	list_for_each_safe(pos, next, list) {
 		struct page *page;
 
 		page = list_entry(pos, struct page, lru);
 		list_del(&page->lru);
-
 		kimage_free_pages(page);
 	}
 }
 
-static struct page *kimage_alloc_normal_control_pages(
-	struct kimage *image, unsigned int order)
+static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
+							unsigned int order)
 {
 	/* Control pages are special, they are the intermediaries
 	 * that are needed while we copy the rest of the pages
@@ -378,6 +423,7 @@ static struct page *kimage_alloc_normal_control_pages(
 	 */
 	do {
 		unsigned long pfn, epfn, addr, eaddr;
+
 		pages = kimage_alloc_pages(GFP_KERNEL, order);
 		if (!pages)
 			break;
@@ -386,12 +432,12 @@ static struct page *kimage_alloc_normal_control_pages(
 		addr  = pfn << PAGE_SHIFT;
 		eaddr = epfn << PAGE_SHIFT;
 		if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
-			kimage_is_destination_range(image, addr, eaddr))
-		{
+			      kimage_is_destination_range(image, addr, eaddr)) {
 			list_add(&pages->lru, &extra_pages);
 			pages = NULL;
 		}
-	} while(!pages);
+	} while (!pages);
+
 	if (pages) {
 		/* Remember the allocated page... */
 		list_add(&pages->lru, &image->control_pages);
@@ -411,12 +457,12 @@ static struct page *kimage_alloc_normal_control_pages(
 	 * For now it is simpler to just free the pages.
 	 */
 	kimage_free_page_list(&extra_pages);
-	return pages;
 
+	return pages;
 }
 
-static struct page *kimage_alloc_crash_control_pages(
-	struct kimage *image, unsigned int order)
+static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
+						      unsigned int order)
 {
 	/* Control pages are special, they are the intermediaries
 	 * that are needed while we copy the rest of the pages
@@ -441,21 +487,22 @@ static struct page *kimage_alloc_crash_control_pages(
 	 */
 	unsigned long hole_start, hole_end, size;
 	struct page *pages;
+
 	pages = NULL;
 	size = (1 << order) << PAGE_SHIFT;
 	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
 	hole_end   = hole_start + size - 1;
-	while(hole_end <= crashk_res.end) {
+	while (hole_end <= crashk_res.end) {
 		unsigned long i;
-		if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) {
+
+		if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
 			break;
-		}
-		if (hole_end > crashk_res.end) {
+		if (hole_end > crashk_res.end)
 			break;
-		}
 		/* See if I overlap any of the segments */
-		for(i = 0; i < image->nr_segments; i++) {
+		for (i = 0; i < image->nr_segments; i++) {
 			unsigned long mstart, mend;
+
 			mstart = image->segment[i].mem;
 			mend   = mstart + image->segment[i].memsz - 1;
 			if ((hole_end >= mstart) && (hole_start <= mend)) {
@@ -471,18 +518,19 @@ static struct page *kimage_alloc_crash_control_pages(
 			break;
 		}
 	}
-	if (pages) {
+	if (pages)
 		image->control_page = hole_end;
-	}
+
 	return pages;
 }
 
 
-struct page *kimage_alloc_control_pages(
-	struct kimage *image, unsigned int order)
+struct page *kimage_alloc_control_pages(struct kimage *image,
+					 unsigned int order)
 {
 	struct page *pages = NULL;
-	switch(image->type) {
+
+	switch (image->type) {
 	case KEXEC_TYPE_DEFAULT:
 		pages = kimage_alloc_normal_control_pages(image, order);
 		break;
@@ -490,43 +538,46 @@ struct page *kimage_alloc_control_pages(
 		pages = kimage_alloc_crash_control_pages(image, order);
 		break;
 	}
+
 	return pages;
 }
 
 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
 {
-	if (*image->entry != 0) {
+	if (*image->entry != 0)
 		image->entry++;
-	}
+
 	if (image->entry == image->last_entry) {
 		kimage_entry_t *ind_page;
 		struct page *page;
+
 		page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
-		if (!page) {
+		if (!page)
 			return -ENOMEM;
-		}
+
 		ind_page = page_address(page);
 		*image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
 		image->entry = ind_page;
-		image->last_entry =
-			ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
+		image->last_entry = ind_page +
+				      ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
 	}
 	*image->entry = entry;
 	image->entry++;
 	*image->entry = 0;
+
 	return 0;
 }
 
-static int kimage_set_destination(
-	struct kimage *image, unsigned long destination)
+static int kimage_set_destination(struct kimage *image,
+				   unsigned long destination)
 {
 	int result;
 
 	destination &= PAGE_MASK;
 	result = kimage_add_entry(image, destination | IND_DESTINATION);
-	if (result == 0) {
+	if (result == 0)
 		image->destination = destination;
-	}
+
 	return result;
 }
 
@@ -537,9 +588,9 @@ static int kimage_add_page(struct kimage *image, unsigned long page)
 
 	page &= PAGE_MASK;
 	result = kimage_add_entry(image, page | IND_SOURCE);
-	if (result == 0) {
+	if (result == 0)
 		image->destination += PAGE_SIZE;
-	}
+
 	return result;
 }
 
@@ -553,13 +604,12 @@ static void kimage_free_extra_pages(struct kimage *image)
 	kimage_free_page_list(&image->unuseable_pages);
 
 }
-static int kimage_terminate(struct kimage *image)
+static void kimage_terminate(struct kimage *image)
 {
-	if (*image->entry != 0) {
+	if (*image->entry != 0)
 		image->entry++;
-	}
+
 	*image->entry = IND_DONE;
-	return 0;
 }
 
 #define for_each_kimage_entry(image, ptr, entry) \
@@ -582,26 +632,24 @@ static void kimage_free(struct kimage *image)
 
 	if (!image)
 		return;
+
 	kimage_free_extra_pages(image);
 	for_each_kimage_entry(image, ptr, entry) {
 		if (entry & IND_INDIRECTION) {
 			/* Free the previous indirection page */
-			if (ind & IND_INDIRECTION) {
+			if (ind & IND_INDIRECTION)
 				kimage_free_entry(ind);
-			}
 			/* Save this indirection page until we are
 			 * done with it.
 			 */
 			ind = entry;
 		}
-		else if (entry & IND_SOURCE) {
+		else if (entry & IND_SOURCE)
 			kimage_free_entry(entry);
-		}
 	}
 	/* Free the final indirection page */
-	if (ind & IND_INDIRECTION) {
+	if (ind & IND_INDIRECTION)
 		kimage_free_entry(ind);
-	}
 
 	/* Handle any machine specific cleanup */
 	machine_kexec_cleanup(image);
@@ -611,26 +659,28 @@ static void kimage_free(struct kimage *image)
 	kfree(image);
 }
 
-static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page)
+static kimage_entry_t *kimage_dst_used(struct kimage *image,
+					unsigned long page)
 {
 	kimage_entry_t *ptr, entry;
 	unsigned long destination = 0;
 
 	for_each_kimage_entry(image, ptr, entry) {
-		if (entry & IND_DESTINATION) {
+		if (entry & IND_DESTINATION)
 			destination = entry & PAGE_MASK;
-		}
 		else if (entry & IND_SOURCE) {
-			if (page == destination) {
+			if (page == destination)
 				return ptr;
-			}
 			destination += PAGE_SIZE;
 		}
 	}
-	return 0;
+
+	return NULL;
 }
 
-static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long destination)
+static struct page *kimage_alloc_page(struct kimage *image,
+					gfp_t gfp_mask,
+					unsigned long destination)
 {
 	/*
 	 * Here we implement safeguards to ensure that a source page
@@ -670,11 +720,11 @@ static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mas
 
 		/* Allocate a page, if we run out of memory give up */
 		page = kimage_alloc_pages(gfp_mask, 0);
-		if (!page) {
-			return 0;
-		}
+		if (!page)
+			return NULL;
 		/* If the page cannot be used file it away */
-		if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
+		if (page_to_pfn(page) >
+				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
 			list_add(&page->lru, &image->unuseable_pages);
 			continue;
 		}
@@ -685,7 +735,8 @@ static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mas
 			break;
 
 		/* If the page is not a destination page use it */
-		if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE))
+		if (!kimage_is_destination_range(image, addr,
+						  addr + PAGE_SIZE))
 			break;
 
 		/*
@@ -705,8 +756,14 @@ static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mas
 			*old = addr | (*old & ~PAGE_MASK);
 
 			/* The old page I have found cannot be a
-			 * destination page, so return it.
+			 * destination page, so return it if it's
+			 * gfp_flags honor the ones passed in.
 			 */
+			if (!(gfp_mask & __GFP_HIGHMEM) &&
+			    PageHighMem(old_page)) {
+				kimage_free_pages(old_page);
+				continue;
+			}
 			addr = old_addr;
 			page = old_page;
 			break;
@@ -718,16 +775,17 @@ static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mas
 			list_add(&page->lru, &image->dest_pages);
 		}
 	}
+
 	return page;
 }
 
 static int kimage_load_normal_segment(struct kimage *image,
-	struct kexec_segment *segment)
+					 struct kexec_segment *segment)
 {
 	unsigned long maddr;
 	unsigned long ubytes, mbytes;
 	int result;
-	unsigned char *buf;
+	unsigned char __user *buf;
 
 	result = 0;
 	buf = segment->buf;
@@ -736,34 +794,36 @@ static int kimage_load_normal_segment(struct kimage *image,
 	maddr = segment->mem;
 
 	result = kimage_set_destination(image, maddr);
-	if (result < 0) {
+	if (result < 0)
 		goto out;
-	}
-	while(mbytes) {
+
+	while (mbytes) {
 		struct page *page;
 		char *ptr;
 		size_t uchunk, mchunk;
+
 		page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
-		if (page == 0) {
+		if (!page) {
 			result  = -ENOMEM;
 			goto out;
 		}
-		result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT);
-		if (result < 0) {
+		result = kimage_add_page(image, page_to_pfn(page)
+								<< PAGE_SHIFT);
+		if (result < 0)
 			goto out;
-		}
+
 		ptr = kmap(page);
 		/* Start with a clear page */
 		memset(ptr, 0, PAGE_SIZE);
 		ptr += maddr & ~PAGE_MASK;
 		mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
-		if (mchunk > mbytes) {
+		if (mchunk > mbytes)
 			mchunk = mbytes;
-		}
+
 		uchunk = mchunk;
-		if (uchunk > ubytes) {
+		if (uchunk > ubytes)
 			uchunk = ubytes;
-		}
+
 		result = copy_from_user(ptr, buf, uchunk);
 		kunmap(page);
 		if (result) {
@@ -775,12 +835,12 @@ static int kimage_load_normal_segment(struct kimage *image,
 		buf    += mchunk;
 		mbytes -= mchunk;
 	}
- out:
+out:
 	return result;
 }
 
 static int kimage_load_crash_segment(struct kimage *image,
-	struct kexec_segment *segment)
+					struct kexec_segment *segment)
 {
 	/* For crash dumps kernels we simply copy the data from
 	 * user space to it's destination.
@@ -789,28 +849,29 @@ static int kimage_load_crash_segment(struct kimage *image,
 	unsigned long maddr;
 	unsigned long ubytes, mbytes;
 	int result;
-	unsigned char *buf;
+	unsigned char __user *buf;
 
 	result = 0;
 	buf = segment->buf;
 	ubytes = segment->bufsz;
 	mbytes = segment->memsz;
 	maddr = segment->mem;
-	while(mbytes) {
+	while (mbytes) {
 		struct page *page;
 		char *ptr;
 		size_t uchunk, mchunk;
+
 		page = pfn_to_page(maddr >> PAGE_SHIFT);
-		if (page == 0) {
+		if (!page) {
 			result  = -ENOMEM;
 			goto out;
 		}
 		ptr = kmap(page);
 		ptr += maddr & ~PAGE_MASK;
 		mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
-		if (mchunk > mbytes) {
+		if (mchunk > mbytes)
 			mchunk = mbytes;
-		}
+
 		uchunk = mchunk;
 		if (uchunk > ubytes) {
 			uchunk = ubytes;
@@ -818,6 +879,7 @@ static int kimage_load_crash_segment(struct kimage *image,
 			memset(ptr + uchunk, 0, mchunk - uchunk);
 		}
 		result = copy_from_user(ptr, buf, uchunk);
+		kexec_flush_icache_page(page);
 		kunmap(page);
 		if (result) {
 			result = (result < 0) ? result : -EIO;
@@ -828,15 +890,16 @@ static int kimage_load_crash_segment(struct kimage *image,
 		buf    += mchunk;
 		mbytes -= mchunk;
 	}
- out:
+out:
 	return result;
 }
 
 static int kimage_load_segment(struct kimage *image,
-	struct kexec_segment *segment)
+				struct kexec_segment *segment)
 {
 	int result = -ENOMEM;
-	switch(image->type) {
+
+	switch (image->type) {
 	case KEXEC_TYPE_DEFAULT:
 		result = kimage_load_normal_segment(image, segment);
 		break;
@@ -844,6 +907,7 @@ static int kimage_load_segment(struct kimage *image,
 		result = kimage_load_crash_segment(image, segment);
 		break;
 	}
+
 	return result;
 }
 
@@ -867,21 +931,15 @@ static int kimage_load_segment(struct kimage *image,
  * kexec does not sync, or unmount filesystems so if you need
  * that to happen you need to do that yourself.
  */
-struct kimage *kexec_image = NULL;
-static struct kimage *kexec_crash_image = NULL;
-/*
- * A home grown binary mutex.
- * Nothing can wait so this mutex is safe to use
- * in interrupt context :)
- */
-static int kexec_lock = 0;
+struct kimage *kexec_image;
+struct kimage *kexec_crash_image;
+
+static DEFINE_MUTEX(kexec_mutex);
 
-asmlinkage long sys_kexec_load(unsigned long entry,
-	unsigned long nr_segments, struct kexec_segment __user *segments,
-	unsigned long flags)
+SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
+		struct kexec_segment __user *, segments, unsigned long, flags)
 {
 	struct kimage **dest_image, *image;
-	int locked;
 	int result;
 
 	/* We only trust the superuser with rebooting the system. */
@@ -898,9 +956,7 @@ asmlinkage long sys_kexec_load(unsigned long entry,
 	/* Verify we are on the appropriate architecture */
 	if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
 		((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
-	{
 		return -EINVAL;
-	}
 
 	/* Put an artificial cap on the number
 	 * of segments passed to kexec_load.
@@ -919,59 +975,59 @@ asmlinkage long sys_kexec_load(unsigned long entry,
 	 *
 	 * KISS: always take the mutex.
 	 */
-	locked = xchg(&kexec_lock, 1);
-	if (locked) {
+	if (!mutex_trylock(&kexec_mutex))
 		return -EBUSY;
-	}
+
 	dest_image = &kexec_image;
-	if (flags & KEXEC_ON_CRASH) {
+	if (flags & KEXEC_ON_CRASH)
 		dest_image = &kexec_crash_image;
-	}
 	if (nr_segments > 0) {
 		unsigned long i;
+
 		/* Loading another kernel to reboot into */
-		if ((flags & KEXEC_ON_CRASH) == 0) {
-			result = kimage_normal_alloc(&image, entry, nr_segments, segments);
-		}
+		if ((flags & KEXEC_ON_CRASH) == 0)
+			result = kimage_normal_alloc(&image, entry,
+							nr_segments, segments);
 		/* Loading another kernel to switch to if this one crashes */
 		else if (flags & KEXEC_ON_CRASH) {
 			/* Free any current crash dump kernel before
 			 * we corrupt it.
 			 */
 			kimage_free(xchg(&kexec_crash_image, NULL));
-			result = kimage_crash_alloc(&image, entry, nr_segments, segments);
+			result = kimage_crash_alloc(&image, entry,
+						     nr_segments, segments);
 		}
-		if (result) {
+		if (result)
 			goto out;
-		}
+
+		if (flags & KEXEC_PRESERVE_CONTEXT)
+			image->preserve_context = 1;
 		result = machine_kexec_prepare(image);
-		if (result) {
+		if (result)
 			goto out;
-		}
-		for(i = 0; i < nr_segments; i++) {
+
+		for (i = 0; i < nr_segments; i++) {
 			result = kimage_load_segment(image, &image->segment[i]);
-			if (result) {
+			if (result)
 				goto out;
-			}
-		}
-		result = kimage_terminate(image);
-		if (result) {
-			goto out;
 		}
+		kimage_terminate(image);
 	}
 	/* Install the new kernel, and  Uninstall the old */
 	image = xchg(dest_image, image);
 
- out:
-	xchg(&kexec_lock, 0); /* Release the mutex */
+out:
+	mutex_unlock(&kexec_mutex);
 	kimage_free(image);
+
 	return result;
 }
 
 #ifdef CONFIG_COMPAT
 asmlinkage long compat_sys_kexec_load(unsigned long entry,
-	unsigned long nr_segments, struct compat_kexec_segment __user *segments,
-	unsigned long flags)
+				unsigned long nr_segments,
+				struct compat_kexec_segment __user *segments,
+				unsigned long flags)
 {
 	struct compat_kexec_segment in;
 	struct kexec_segment out, __user *ksegments;
@@ -980,20 +1036,17 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry,
 	/* Don't allow clients that don't understand the native
 	 * architecture to do anything.
 	 */
-	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) {
+	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
 		return -EINVAL;
-	}
 
-	if (nr_segments > KEXEC_SEGMENT_MAX) {
+	if (nr_segments > KEXEC_SEGMENT_MAX)
 		return -EINVAL;
-	}
 
 	ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
 	for (i=0; i < nr_segments; i++) {
 		result = copy_from_user(&in, &segments[i], sizeof(in));
-		if (result) {
+		if (result)
 			return -EFAULT;
-		}
 
 		out.buf   = compat_ptr(in.buf);
 		out.bufsz = in.bufsz;
@@ -1001,22 +1054,17 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry,
 		out.memsz = in.memsz;
 
 		result = copy_to_user(&ksegments[i], &out, sizeof(out));
-		if (result) {
+		if (result)
 			return -EFAULT;
-		}
 	}
 
 	return sys_kexec_load(entry, nr_segments, ksegments, flags);
 }
 #endif
 
-void crash_kexec(void)
+void crash_kexec(struct pt_regs *regs)
 {
-	struct kimage *image;
-	int locked;
-
-
-	/* Take the kexec_lock here to prevent sys_kexec_load
+	/* Take the kexec_mutex here to prevent sys_kexec_load
 	 * running on one cpu from replacing the crash kernel
 	 * we are using after a panic on a different cpu.
 	 *
@@ -1024,13 +1072,494 @@ void crash_kexec(void)
 	 * of memory the xchg(&kexec_crash_image) would be
 	 * sufficient.  But since I reuse the memory...
 	 */
-	locked = xchg(&kexec_lock, 1);
-	if (!locked) {
-		image = xchg(&kexec_crash_image, NULL);
-		if (image) {
-			machine_crash_shutdown();
-			machine_kexec(image);
+	if (mutex_trylock(&kexec_mutex)) {
+		if (kexec_crash_image) {
+			struct pt_regs fixed_regs;
+
+			kmsg_dump(KMSG_DUMP_KEXEC);
+
+			crash_setup_regs(&fixed_regs, regs);
+			crash_save_vmcoreinfo();
+			machine_crash_shutdown(&fixed_regs);
+			machine_kexec(kexec_crash_image);
 		}
-		xchg(&kexec_lock, 0);
+		mutex_unlock(&kexec_mutex);
 	}
 }
+
+size_t crash_get_memory_size(void)
+{
+	size_t size;
+	mutex_lock(&kexec_mutex);
+	size = crashk_res.end - crashk_res.start + 1;
+	mutex_unlock(&kexec_mutex);
+	return size;
+}
+
+static void free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
+		init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
+		free_page((unsigned long)__va(addr));
+		totalram_pages++;
+	}
+}
+
+int crash_shrink_memory(unsigned long new_size)
+{
+	int ret = 0;
+	unsigned long start, end;
+
+	mutex_lock(&kexec_mutex);
+
+	if (kexec_crash_image) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+	start = crashk_res.start;
+	end = crashk_res.end;
+
+	if (new_size >= end - start + 1) {
+		ret = -EINVAL;
+		if (new_size == end - start + 1)
+			ret = 0;
+		goto unlock;
+	}
+
+	start = roundup(start, PAGE_SIZE);
+	end = roundup(start + new_size, PAGE_SIZE);
+
+	free_reserved_phys_range(end, crashk_res.end);
+
+	if (start == end)
+		release_resource(&crashk_res);
+	crashk_res.end = end - 1;
+
+unlock:
+	mutex_unlock(&kexec_mutex);
+	return ret;
+}
+
+static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
+			    size_t data_len)
+{
+	struct elf_note note;
+
+	note.n_namesz = strlen(name) + 1;
+	note.n_descsz = data_len;
+	note.n_type   = type;
+	memcpy(buf, &note, sizeof(note));
+	buf += (sizeof(note) + 3)/4;
+	memcpy(buf, name, note.n_namesz);
+	buf += (note.n_namesz + 3)/4;
+	memcpy(buf, data, note.n_descsz);
+	buf += (note.n_descsz + 3)/4;
+
+	return buf;
+}
+
+static void final_note(u32 *buf)
+{
+	struct elf_note note;
+
+	note.n_namesz = 0;
+	note.n_descsz = 0;
+	note.n_type   = 0;
+	memcpy(buf, &note, sizeof(note));
+}
+
+void crash_save_cpu(struct pt_regs *regs, int cpu)
+{
+	struct elf_prstatus prstatus;
+	u32 *buf;
+
+	if ((cpu < 0) || (cpu >= nr_cpu_ids))
+		return;
+
+	/* Using ELF notes here is opportunistic.
+	 * I need a well defined structure format
+	 * for the data I pass, and I need tags
+	 * on the data to indicate what information I have
+	 * squirrelled away.  ELF notes happen to provide
+	 * all of that, so there is no need to invent something new.
+	 */
+	buf = (u32*)per_cpu_ptr(crash_notes, cpu);
+	if (!buf)
+		return;
+	memset(&prstatus, 0, sizeof(prstatus));
+	prstatus.pr_pid = current->pid;
+	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
+		      	      &prstatus, sizeof(prstatus));
+	final_note(buf);
+}
+
+static int __init crash_notes_memory_init(void)
+{
+	/* Allocate memory for saving cpu registers. */
+	crash_notes = alloc_percpu(note_buf_t);
+	if (!crash_notes) {
+		printk("Kexec: Memory allocation for saving cpu register"
+		" states failed\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+module_init(crash_notes_memory_init)
+
+
+/*
+ * parsing the "crashkernel" commandline
+ *
+ * this code is intended to be called from architecture specific code
+ */
+
+
+/*
+ * This function parses command lines in the format
+ *
+ *   crashkernel=ramsize-range:size[,...][@offset]
+ *
+ * The function returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_mem(char 			*cmdline,
+					unsigned long long	system_ram,
+					unsigned long long	*crash_size,
+					unsigned long long	*crash_base)
+{
+	char *cur = cmdline, *tmp;
+
+	/* for each entry of the comma-separated list */
+	do {
+		unsigned long long start, end = ULLONG_MAX, size;
+
+		/* get the start of the range */
+		start = memparse(cur, &tmp);
+		if (cur == tmp) {
+			pr_warning("crashkernel: Memory value expected\n");
+			return -EINVAL;
+		}
+		cur = tmp;
+		if (*cur != '-') {
+			pr_warning("crashkernel: '-' expected\n");
+			return -EINVAL;
+		}
+		cur++;
+
+		/* if no ':' is here, than we read the end */
+		if (*cur != ':') {
+			end = memparse(cur, &tmp);
+			if (cur == tmp) {
+				pr_warning("crashkernel: Memory "
+						"value expected\n");
+				return -EINVAL;
+			}
+			cur = tmp;
+			if (end <= start) {
+				pr_warning("crashkernel: end <= start\n");
+				return -EINVAL;
+			}
+		}
+
+		if (*cur != ':') {
+			pr_warning("crashkernel: ':' expected\n");
+			return -EINVAL;
+		}
+		cur++;
+
+		size = memparse(cur, &tmp);
+		if (cur == tmp) {
+			pr_warning("Memory value expected\n");
+			return -EINVAL;
+		}
+		cur = tmp;
+		if (size >= system_ram) {
+			pr_warning("crashkernel: invalid size\n");
+			return -EINVAL;
+		}
+
+		/* match ? */
+		if (system_ram >= start && system_ram < end) {
+			*crash_size = size;
+			break;
+		}
+	} while (*cur++ == ',');
+
+	if (*crash_size > 0) {
+		while (*cur && *cur != ' ' && *cur != '@')
+			cur++;
+		if (*cur == '@') {
+			cur++;
+			*crash_base = memparse(cur, &tmp);
+			if (cur == tmp) {
+				pr_warning("Memory value expected "
+						"after '@'\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * That function parses "simple" (old) crashkernel command lines like
+ *
+ * 	crashkernel=size[@offset]
+ *
+ * It returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_simple(char 		*cmdline,
+					   unsigned long long 	*crash_size,
+					   unsigned long long 	*crash_base)
+{
+	char *cur = cmdline;
+
+	*crash_size = memparse(cmdline, &cur);
+	if (cmdline == cur) {
+		pr_warning("crashkernel: memory value expected\n");
+		return -EINVAL;
+	}
+
+	if (*cur == '@')
+		*crash_base = memparse(cur+1, &cur);
+
+	return 0;
+}
+
+/*
+ * That function is the entry point for command line parsing and should be
+ * called from the arch-specific code.
+ */
+int __init parse_crashkernel(char 		 *cmdline,
+			     unsigned long long system_ram,
+			     unsigned long long *crash_size,
+			     unsigned long long *crash_base)
+{
+	char 	*p = cmdline, *ck_cmdline = NULL;
+	char	*first_colon, *first_space;
+
+	BUG_ON(!crash_size || !crash_base);
+	*crash_size = 0;
+	*crash_base = 0;
+
+	/* find crashkernel and use the last one if there are more */
+	p = strstr(p, "crashkernel=");
+	while (p) {
+		ck_cmdline = p;
+		p = strstr(p+1, "crashkernel=");
+	}
+
+	if (!ck_cmdline)
+		return -EINVAL;
+
+	ck_cmdline += 12; /* strlen("crashkernel=") */
+
+	/*
+	 * if the commandline contains a ':', then that's the extended
+	 * syntax -- if not, it must be the classic syntax
+	 */
+	first_colon = strchr(ck_cmdline, ':');
+	first_space = strchr(ck_cmdline, ' ');
+	if (first_colon && (!first_space || first_colon < first_space))
+		return parse_crashkernel_mem(ck_cmdline, system_ram,
+				crash_size, crash_base);
+	else
+		return parse_crashkernel_simple(ck_cmdline, crash_size,
+				crash_base);
+
+	return 0;
+}
+
+
+
+void crash_save_vmcoreinfo(void)
+{
+	u32 *buf;
+
+	if (!vmcoreinfo_size)
+		return;
+
+	vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
+
+	buf = (u32 *)vmcoreinfo_note;
+
+	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
+			      vmcoreinfo_size);
+
+	final_note(buf);
+}
+
+void vmcoreinfo_append_str(const char *fmt, ...)
+{
+	va_list args;
+	char buf[0x50];
+	int r;
+
+	va_start(args, fmt);
+	r = vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+
+	if (r + vmcoreinfo_size > vmcoreinfo_max_size)
+		r = vmcoreinfo_max_size - vmcoreinfo_size;
+
+	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
+
+	vmcoreinfo_size += r;
+}
+
+/*
+ * provide an empty default implementation here -- architecture
+ * code may override this
+ */
+void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
+{}
+
+unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
+{
+	return __pa((unsigned long)(char *)&vmcoreinfo_note);
+}
+
+static int __init crash_save_vmcoreinfo_init(void)
+{
+	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
+	VMCOREINFO_PAGESIZE(PAGE_SIZE);
+
+	VMCOREINFO_SYMBOL(init_uts_ns);
+	VMCOREINFO_SYMBOL(node_online_map);
+	VMCOREINFO_SYMBOL(swapper_pg_dir);
+	VMCOREINFO_SYMBOL(_stext);
+	VMCOREINFO_SYMBOL(vmlist);
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+	VMCOREINFO_SYMBOL(mem_map);
+	VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#ifdef CONFIG_SPARSEMEM
+	VMCOREINFO_SYMBOL(mem_section);
+	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
+	VMCOREINFO_STRUCT_SIZE(mem_section);
+	VMCOREINFO_OFFSET(mem_section, section_mem_map);
+#endif
+	VMCOREINFO_STRUCT_SIZE(page);
+	VMCOREINFO_STRUCT_SIZE(pglist_data);
+	VMCOREINFO_STRUCT_SIZE(zone);
+	VMCOREINFO_STRUCT_SIZE(free_area);
+	VMCOREINFO_STRUCT_SIZE(list_head);
+	VMCOREINFO_SIZE(nodemask_t);
+	VMCOREINFO_OFFSET(page, flags);
+	VMCOREINFO_OFFSET(page, _count);
+	VMCOREINFO_OFFSET(page, mapping);
+	VMCOREINFO_OFFSET(page, lru);
+	VMCOREINFO_OFFSET(pglist_data, node_zones);
+	VMCOREINFO_OFFSET(pglist_data, nr_zones);
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
+#endif
+	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
+	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
+	VMCOREINFO_OFFSET(pglist_data, node_id);
+	VMCOREINFO_OFFSET(zone, free_area);
+	VMCOREINFO_OFFSET(zone, vm_stat);
+	VMCOREINFO_OFFSET(zone, spanned_pages);
+	VMCOREINFO_OFFSET(free_area, free_list);
+	VMCOREINFO_OFFSET(list_head, next);
+	VMCOREINFO_OFFSET(list_head, prev);
+	VMCOREINFO_OFFSET(vm_struct, addr);
+	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
+	log_buf_kexec_setup();
+	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
+	VMCOREINFO_NUMBER(NR_FREE_PAGES);
+	VMCOREINFO_NUMBER(PG_lru);
+	VMCOREINFO_NUMBER(PG_private);
+	VMCOREINFO_NUMBER(PG_swapcache);
+
+	arch_crash_save_vmcoreinfo();
+
+	return 0;
+}
+
+module_init(crash_save_vmcoreinfo_init)
+
+/*
+ * Move into place and start executing a preloaded standalone
+ * executable.  If nothing was preloaded return an error.
+ */
+int kernel_kexec(void)
+{
+	int error = 0;
+
+	if (!mutex_trylock(&kexec_mutex))
+		return -EBUSY;
+	if (!kexec_image) {
+		error = -EINVAL;
+		goto Unlock;
+	}
+
+#ifdef CONFIG_KEXEC_JUMP
+	if (kexec_image->preserve_context) {
+		mutex_lock(&pm_mutex);
+		pm_prepare_console();
+		error = freeze_processes();
+		if (error) {
+			error = -EBUSY;
+			goto Restore_console;
+		}
+		suspend_console();
+		error = dpm_suspend_start(PMSG_FREEZE);
+		if (error)
+			goto Resume_console;
+		/* At this point, dpm_suspend_start() has been called,
+		 * but *not* dpm_suspend_noirq(). We *must* call
+		 * dpm_suspend_noirq() now.  Otherwise, drivers for
+		 * some devices (e.g. interrupt controllers) become
+		 * desynchronized with the actual state of the
+		 * hardware at resume time, and evil weirdness ensues.
+		 */
+		error = dpm_suspend_noirq(PMSG_FREEZE);
+		if (error)
+			goto Resume_devices;
+		error = disable_nonboot_cpus();
+		if (error)
+			goto Enable_cpus;
+		local_irq_disable();
+		/* Suspend system devices */
+		error = sysdev_suspend(PMSG_FREEZE);
+		if (error)
+			goto Enable_irqs;
+	} else
+#endif
+	{
+		kernel_restart_prepare(NULL);
+		printk(KERN_EMERG "Starting new kernel\n");
+		machine_shutdown();
+	}
+
+	machine_kexec(kexec_image);
+
+#ifdef CONFIG_KEXEC_JUMP
+	if (kexec_image->preserve_context) {
+		sysdev_resume();
+ Enable_irqs:
+		local_irq_enable();
+ Enable_cpus:
+		enable_nonboot_cpus();
+		dpm_resume_noirq(PMSG_RESTORE);
+ Resume_devices:
+		dpm_resume_end(PMSG_RESTORE);
+ Resume_console:
+		resume_console();
+		thaw_processes();
+ Restore_console:
+		pm_restore_console();
+		mutex_unlock(&pm_mutex);
+	}
+#endif
+
+ Unlock:
+	mutex_unlock(&kexec_mutex);
+	return error;
+}