nfsd: move most of nfsfh.h to fs/nfsd
[safe/jmp/linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <asm/uaccess.h>
35 #include <asm/param.h>
36 #include <asm/page.h>
37
38 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
39 static int load_elf_library(struct file *);
40 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
41                                 int, int, unsigned long);
42
43 /*
44  * If we don't support core dumping, then supply a NULL so we
45  * don't even try.
46  */
47 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
48 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
49 #else
50 #define elf_core_dump   NULL
51 #endif
52
53 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
54 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
55 #else
56 #define ELF_MIN_ALIGN   PAGE_SIZE
57 #endif
58
59 #ifndef ELF_CORE_EFLAGS
60 #define ELF_CORE_EFLAGS 0
61 #endif
62
63 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
64 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
65 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
66
67 static struct linux_binfmt elf_format = {
68                 .module         = THIS_MODULE,
69                 .load_binary    = load_elf_binary,
70                 .load_shlib     = load_elf_library,
71                 .core_dump      = elf_core_dump,
72                 .min_coredump   = ELF_EXEC_PAGESIZE,
73                 .hasvdso        = 1
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 #ifndef elf_map
319
320 static unsigned long elf_map(struct file *filep, unsigned long addr,
321                 struct elf_phdr *eppnt, int prot, int type,
322                 unsigned long total_size)
323 {
324         unsigned long map_addr;
325         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
326         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
327         addr = ELF_PAGESTART(addr);
328         size = ELF_PAGEALIGN(size);
329
330         /* mmap() will return -EINVAL if given a zero size, but a
331          * segment with zero filesize is perfectly valid */
332         if (!size)
333                 return addr;
334
335         down_write(&current->mm->mmap_sem);
336         /*
337         * total_size is the size of the ELF (interpreter) image.
338         * The _first_ mmap needs to know the full size, otherwise
339         * randomization might put this image into an overlapping
340         * position with the ELF binary image. (since size < total_size)
341         * So we first map the 'big' image - and unmap the remainder at
342         * the end. (which unmap is needed for ELF images with holes.)
343         */
344         if (total_size) {
345                 total_size = ELF_PAGEALIGN(total_size);
346                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
347                 if (!BAD_ADDR(map_addr))
348                         do_munmap(current->mm, map_addr+size, total_size-size);
349         } else
350                 map_addr = do_mmap(filep, addr, size, prot, type, off);
351
352         up_write(&current->mm->mmap_sem);
353         return(map_addr);
354 }
355
356 #endif /* !elf_map */
357
358 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
359 {
360         int i, first_idx = -1, last_idx = -1;
361
362         for (i = 0; i < nr; i++) {
363                 if (cmds[i].p_type == PT_LOAD) {
364                         last_idx = i;
365                         if (first_idx == -1)
366                                 first_idx = i;
367                 }
368         }
369         if (first_idx == -1)
370                 return 0;
371
372         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
373                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
374 }
375
376
377 /* This is much more generalized than the library routine read function,
378    so we keep this separate.  Technically the library read function
379    is only provided so that we can read a.out libraries that have
380    an ELF header */
381
382 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
383                 struct file *interpreter, unsigned long *interp_map_addr,
384                 unsigned long no_base)
385 {
386         struct elf_phdr *elf_phdata;
387         struct elf_phdr *eppnt;
388         unsigned long load_addr = 0;
389         int load_addr_set = 0;
390         unsigned long last_bss = 0, elf_bss = 0;
391         unsigned long error = ~0UL;
392         unsigned long total_size;
393         int retval, i, size;
394
395         /* First of all, some simple consistency checks */
396         if (interp_elf_ex->e_type != ET_EXEC &&
397             interp_elf_ex->e_type != ET_DYN)
398                 goto out;
399         if (!elf_check_arch(interp_elf_ex))
400                 goto out;
401         if (!interpreter->f_op || !interpreter->f_op->mmap)
402                 goto out;
403
404         /*
405          * If the size of this structure has changed, then punt, since
406          * we will be doing the wrong thing.
407          */
408         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
409                 goto out;
410         if (interp_elf_ex->e_phnum < 1 ||
411                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
412                 goto out;
413
414         /* Now read in all of the header information */
415         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
416         if (size > ELF_MIN_ALIGN)
417                 goto out;
418         elf_phdata = kmalloc(size, GFP_KERNEL);
419         if (!elf_phdata)
420                 goto out;
421
422         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
423                              (char *)elf_phdata,size);
424         error = -EIO;
425         if (retval != size) {
426                 if (retval < 0)
427                         error = retval; 
428                 goto out_close;
429         }
430
431         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
432         if (!total_size) {
433                 error = -EINVAL;
434                 goto out_close;
435         }
436
437         eppnt = elf_phdata;
438         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
439                 if (eppnt->p_type == PT_LOAD) {
440                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
441                         int elf_prot = 0;
442                         unsigned long vaddr = 0;
443                         unsigned long k, map_addr;
444
445                         if (eppnt->p_flags & PF_R)
446                                 elf_prot = PROT_READ;
447                         if (eppnt->p_flags & PF_W)
448                                 elf_prot |= PROT_WRITE;
449                         if (eppnt->p_flags & PF_X)
450                                 elf_prot |= PROT_EXEC;
451                         vaddr = eppnt->p_vaddr;
452                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
453                                 elf_type |= MAP_FIXED;
454                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
455                                 load_addr = -vaddr;
456
457                         map_addr = elf_map(interpreter, load_addr + vaddr,
458                                         eppnt, elf_prot, elf_type, total_size);
459                         total_size = 0;
460                         if (!*interp_map_addr)
461                                 *interp_map_addr = map_addr;
462                         error = map_addr;
463                         if (BAD_ADDR(map_addr))
464                                 goto out_close;
465
466                         if (!load_addr_set &&
467                             interp_elf_ex->e_type == ET_DYN) {
468                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
469                                 load_addr_set = 1;
470                         }
471
472                         /*
473                          * Check to see if the section's size will overflow the
474                          * allowed task size. Note that p_filesz must always be
475                          * <= p_memsize so it's only necessary to check p_memsz.
476                          */
477                         k = load_addr + eppnt->p_vaddr;
478                         if (BAD_ADDR(k) ||
479                             eppnt->p_filesz > eppnt->p_memsz ||
480                             eppnt->p_memsz > TASK_SIZE ||
481                             TASK_SIZE - eppnt->p_memsz < k) {
482                                 error = -ENOMEM;
483                                 goto out_close;
484                         }
485
486                         /*
487                          * Find the end of the file mapping for this phdr, and
488                          * keep track of the largest address we see for this.
489                          */
490                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
491                         if (k > elf_bss)
492                                 elf_bss = k;
493
494                         /*
495                          * Do the same thing for the memory mapping - between
496                          * elf_bss and last_bss is the bss section.
497                          */
498                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
499                         if (k > last_bss)
500                                 last_bss = k;
501                 }
502         }
503
504         if (last_bss > elf_bss) {
505                 /*
506                  * Now fill out the bss section.  First pad the last page up
507                  * to the page boundary, and then perform a mmap to make sure
508                  * that there are zero-mapped pages up to and including the
509                  * last bss page.
510                  */
511                 if (padzero(elf_bss)) {
512                         error = -EFAULT;
513                         goto out_close;
514                 }
515
516                 /* What we have mapped so far */
517                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
518
519                 /* Map the last of the bss segment */
520                 down_write(&current->mm->mmap_sem);
521                 error = do_brk(elf_bss, last_bss - elf_bss);
522                 up_write(&current->mm->mmap_sem);
523                 if (BAD_ADDR(error))
524                         goto out_close;
525         }
526
527         error = load_addr;
528
529 out_close:
530         kfree(elf_phdata);
531 out:
532         return error;
533 }
534
535 /*
536  * These are the functions used to load ELF style executables and shared
537  * libraries.  There is no binary dependent code anywhere else.
538  */
539
540 #define INTERPRETER_NONE 0
541 #define INTERPRETER_ELF 2
542
543 #ifndef STACK_RND_MASK
544 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
545 #endif
546
547 static unsigned long randomize_stack_top(unsigned long stack_top)
548 {
549         unsigned int random_variable = 0;
550
551         if ((current->flags & PF_RANDOMIZE) &&
552                 !(current->personality & ADDR_NO_RANDOMIZE)) {
553                 random_variable = get_random_int() & STACK_RND_MASK;
554                 random_variable <<= PAGE_SHIFT;
555         }
556 #ifdef CONFIG_STACK_GROWSUP
557         return PAGE_ALIGN(stack_top) + random_variable;
558 #else
559         return PAGE_ALIGN(stack_top) - random_variable;
560 #endif
561 }
562
563 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
564 {
565         struct file *interpreter = NULL; /* to shut gcc up */
566         unsigned long load_addr = 0, load_bias = 0;
567         int load_addr_set = 0;
568         char * elf_interpreter = NULL;
569         unsigned long error;
570         struct elf_phdr *elf_ppnt, *elf_phdata;
571         unsigned long elf_bss, elf_brk;
572         int retval, i;
573         unsigned int size;
574         unsigned long elf_entry;
575         unsigned long interp_load_addr = 0;
576         unsigned long start_code, end_code, start_data, end_data;
577         unsigned long reloc_func_desc = 0;
578         int executable_stack = EXSTACK_DEFAULT;
579         unsigned long def_flags = 0;
580         struct {
581                 struct elfhdr elf_ex;
582                 struct elfhdr interp_elf_ex;
583         } *loc;
584
585         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
586         if (!loc) {
587                 retval = -ENOMEM;
588                 goto out_ret;
589         }
590         
591         /* Get the exec-header */
592         loc->elf_ex = *((struct elfhdr *)bprm->buf);
593
594         retval = -ENOEXEC;
595         /* First of all, some simple consistency checks */
596         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
597                 goto out;
598
599         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
600                 goto out;
601         if (!elf_check_arch(&loc->elf_ex))
602                 goto out;
603         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
604                 goto out;
605
606         /* Now read in all of the header information */
607         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
608                 goto out;
609         if (loc->elf_ex.e_phnum < 1 ||
610                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
611                 goto out;
612         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
613         retval = -ENOMEM;
614         elf_phdata = kmalloc(size, GFP_KERNEL);
615         if (!elf_phdata)
616                 goto out;
617
618         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
619                              (char *)elf_phdata, size);
620         if (retval != size) {
621                 if (retval >= 0)
622                         retval = -EIO;
623                 goto out_free_ph;
624         }
625
626         elf_ppnt = elf_phdata;
627         elf_bss = 0;
628         elf_brk = 0;
629
630         start_code = ~0UL;
631         end_code = 0;
632         start_data = 0;
633         end_data = 0;
634
635         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
636                 if (elf_ppnt->p_type == PT_INTERP) {
637                         /* This is the program interpreter used for
638                          * shared libraries - for now assume that this
639                          * is an a.out format binary
640                          */
641                         retval = -ENOEXEC;
642                         if (elf_ppnt->p_filesz > PATH_MAX || 
643                             elf_ppnt->p_filesz < 2)
644                                 goto out_free_ph;
645
646                         retval = -ENOMEM;
647                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
648                                                   GFP_KERNEL);
649                         if (!elf_interpreter)
650                                 goto out_free_ph;
651
652                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
653                                              elf_interpreter,
654                                              elf_ppnt->p_filesz);
655                         if (retval != elf_ppnt->p_filesz) {
656                                 if (retval >= 0)
657                                         retval = -EIO;
658                                 goto out_free_interp;
659                         }
660                         /* make sure path is NULL terminated */
661                         retval = -ENOEXEC;
662                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663                                 goto out_free_interp;
664
665                         /*
666                          * The early SET_PERSONALITY here is so that the lookup
667                          * for the interpreter happens in the namespace of the 
668                          * to-be-execed image.  SET_PERSONALITY can select an
669                          * alternate root.
670                          *
671                          * However, SET_PERSONALITY is NOT allowed to switch
672                          * this task into the new images's memory mapping
673                          * policy - that is, TASK_SIZE must still evaluate to
674                          * that which is appropriate to the execing application.
675                          * This is because exit_mmap() needs to have TASK_SIZE
676                          * evaluate to the size of the old image.
677                          *
678                          * So if (say) a 64-bit application is execing a 32-bit
679                          * application it is the architecture's responsibility
680                          * to defer changing the value of TASK_SIZE until the
681                          * switch really is going to happen - do this in
682                          * flush_thread().      - akpm
683                          */
684                         SET_PERSONALITY(loc->elf_ex);
685
686                         interpreter = open_exec(elf_interpreter);
687                         retval = PTR_ERR(interpreter);
688                         if (IS_ERR(interpreter))
689                                 goto out_free_interp;
690
691                         /*
692                          * If the binary is not readable then enforce
693                          * mm->dumpable = 0 regardless of the interpreter's
694                          * permissions.
695                          */
696                         if (file_permission(interpreter, MAY_READ) < 0)
697                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
698
699                         retval = kernel_read(interpreter, 0, bprm->buf,
700                                              BINPRM_BUF_SIZE);
701                         if (retval != BINPRM_BUF_SIZE) {
702                                 if (retval >= 0)
703                                         retval = -EIO;
704                                 goto out_free_dentry;
705                         }
706
707                         /* Get the exec headers */
708                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
709                         break;
710                 }
711                 elf_ppnt++;
712         }
713
714         elf_ppnt = elf_phdata;
715         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
716                 if (elf_ppnt->p_type == PT_GNU_STACK) {
717                         if (elf_ppnt->p_flags & PF_X)
718                                 executable_stack = EXSTACK_ENABLE_X;
719                         else
720                                 executable_stack = EXSTACK_DISABLE_X;
721                         break;
722                 }
723
724         /* Some simple consistency checks for the interpreter */
725         if (elf_interpreter) {
726                 retval = -ELIBBAD;
727                 /* Not an ELF interpreter */
728                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
729                         goto out_free_dentry;
730                 /* Verify the interpreter has a valid arch */
731                 if (!elf_check_arch(&loc->interp_elf_ex))
732                         goto out_free_dentry;
733         } else {
734                 /* Executables without an interpreter also need a personality  */
735                 SET_PERSONALITY(loc->elf_ex);
736         }
737
738         /* Flush all traces of the currently running executable */
739         retval = flush_old_exec(bprm);
740         if (retval)
741                 goto out_free_dentry;
742
743         /* OK, This is the point of no return */
744         current->flags &= ~PF_FORKNOEXEC;
745         current->mm->def_flags = def_flags;
746
747         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
748            may depend on the personality.  */
749         SET_PERSONALITY(loc->elf_ex);
750         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
751                 current->personality |= READ_IMPLIES_EXEC;
752
753         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754                 current->flags |= PF_RANDOMIZE;
755         arch_pick_mmap_layout(current->mm);
756
757         /* Do this so that we can load the interpreter, if need be.  We will
758            change some of these later */
759         current->mm->free_area_cache = current->mm->mmap_base;
760         current->mm->cached_hole_size = 0;
761         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
762                                  executable_stack);
763         if (retval < 0) {
764                 send_sig(SIGKILL, current, 0);
765                 goto out_free_dentry;
766         }
767         
768         current->mm->start_stack = bprm->p;
769
770         /* Now we do a little grungy work by mmaping the ELF image into
771            the correct location in memory. */
772         for(i = 0, elf_ppnt = elf_phdata;
773             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
774                 int elf_prot = 0, elf_flags;
775                 unsigned long k, vaddr;
776
777                 if (elf_ppnt->p_type != PT_LOAD)
778                         continue;
779
780                 if (unlikely (elf_brk > elf_bss)) {
781                         unsigned long nbyte;
782                     
783                         /* There was a PT_LOAD segment with p_memsz > p_filesz
784                            before this one. Map anonymous pages, if needed,
785                            and clear the area.  */
786                         retval = set_brk (elf_bss + load_bias,
787                                           elf_brk + load_bias);
788                         if (retval) {
789                                 send_sig(SIGKILL, current, 0);
790                                 goto out_free_dentry;
791                         }
792                         nbyte = ELF_PAGEOFFSET(elf_bss);
793                         if (nbyte) {
794                                 nbyte = ELF_MIN_ALIGN - nbyte;
795                                 if (nbyte > elf_brk - elf_bss)
796                                         nbyte = elf_brk - elf_bss;
797                                 if (clear_user((void __user *)elf_bss +
798                                                         load_bias, nbyte)) {
799                                         /*
800                                          * This bss-zeroing can fail if the ELF
801                                          * file specifies odd protections. So
802                                          * we don't check the return value
803                                          */
804                                 }
805                         }
806                 }
807
808                 if (elf_ppnt->p_flags & PF_R)
809                         elf_prot |= PROT_READ;
810                 if (elf_ppnt->p_flags & PF_W)
811                         elf_prot |= PROT_WRITE;
812                 if (elf_ppnt->p_flags & PF_X)
813                         elf_prot |= PROT_EXEC;
814
815                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
816
817                 vaddr = elf_ppnt->p_vaddr;
818                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
819                         elf_flags |= MAP_FIXED;
820                 } else if (loc->elf_ex.e_type == ET_DYN) {
821                         /* Try and get dynamic programs out of the way of the
822                          * default mmap base, as well as whatever program they
823                          * might try to exec.  This is because the brk will
824                          * follow the loader, and is not movable.  */
825 #ifdef CONFIG_X86
826                         load_bias = 0;
827 #else
828                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
829 #endif
830                 }
831
832                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
833                                 elf_prot, elf_flags, 0);
834                 if (BAD_ADDR(error)) {
835                         send_sig(SIGKILL, current, 0);
836                         retval = IS_ERR((void *)error) ?
837                                 PTR_ERR((void*)error) : -EINVAL;
838                         goto out_free_dentry;
839                 }
840
841                 if (!load_addr_set) {
842                         load_addr_set = 1;
843                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
844                         if (loc->elf_ex.e_type == ET_DYN) {
845                                 load_bias += error -
846                                              ELF_PAGESTART(load_bias + vaddr);
847                                 load_addr += load_bias;
848                                 reloc_func_desc = load_bias;
849                         }
850                 }
851                 k = elf_ppnt->p_vaddr;
852                 if (k < start_code)
853                         start_code = k;
854                 if (start_data < k)
855                         start_data = k;
856
857                 /*
858                  * Check to see if the section's size will overflow the
859                  * allowed task size. Note that p_filesz must always be
860                  * <= p_memsz so it is only necessary to check p_memsz.
861                  */
862                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
863                     elf_ppnt->p_memsz > TASK_SIZE ||
864                     TASK_SIZE - elf_ppnt->p_memsz < k) {
865                         /* set_brk can never work. Avoid overflows. */
866                         send_sig(SIGKILL, current, 0);
867                         retval = -EINVAL;
868                         goto out_free_dentry;
869                 }
870
871                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
872
873                 if (k > elf_bss)
874                         elf_bss = k;
875                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
876                         end_code = k;
877                 if (end_data < k)
878                         end_data = k;
879                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
880                 if (k > elf_brk)
881                         elf_brk = k;
882         }
883
884         loc->elf_ex.e_entry += load_bias;
885         elf_bss += load_bias;
886         elf_brk += load_bias;
887         start_code += load_bias;
888         end_code += load_bias;
889         start_data += load_bias;
890         end_data += load_bias;
891
892         /* Calling set_brk effectively mmaps the pages that we need
893          * for the bss and break sections.  We must do this before
894          * mapping in the interpreter, to make sure it doesn't wind
895          * up getting placed where the bss needs to go.
896          */
897         retval = set_brk(elf_bss, elf_brk);
898         if (retval) {
899                 send_sig(SIGKILL, current, 0);
900                 goto out_free_dentry;
901         }
902         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
903                 send_sig(SIGSEGV, current, 0);
904                 retval = -EFAULT; /* Nobody gets to see this, but.. */
905                 goto out_free_dentry;
906         }
907
908         if (elf_interpreter) {
909                 unsigned long uninitialized_var(interp_map_addr);
910
911                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
912                                             interpreter,
913                                             &interp_map_addr,
914                                             load_bias);
915                 if (!IS_ERR((void *)elf_entry)) {
916                         /*
917                          * load_elf_interp() returns relocation
918                          * adjustment
919                          */
920                         interp_load_addr = elf_entry;
921                         elf_entry += loc->interp_elf_ex.e_entry;
922                 }
923                 if (BAD_ADDR(elf_entry)) {
924                         force_sig(SIGSEGV, current);
925                         retval = IS_ERR((void *)elf_entry) ?
926                                         (int)elf_entry : -EINVAL;
927                         goto out_free_dentry;
928                 }
929                 reloc_func_desc = interp_load_addr;
930
931                 allow_write_access(interpreter);
932                 fput(interpreter);
933                 kfree(elf_interpreter);
934         } else {
935                 elf_entry = loc->elf_ex.e_entry;
936                 if (BAD_ADDR(elf_entry)) {
937                         force_sig(SIGSEGV, current);
938                         retval = -EINVAL;
939                         goto out_free_dentry;
940                 }
941         }
942
943         kfree(elf_phdata);
944
945         set_binfmt(&elf_format);
946
947 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
948         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
949         if (retval < 0) {
950                 send_sig(SIGKILL, current, 0);
951                 goto out;
952         }
953 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
954
955         install_exec_creds(bprm);
956         current->flags &= ~PF_FORKNOEXEC;
957         retval = create_elf_tables(bprm, &loc->elf_ex,
958                           load_addr, interp_load_addr);
959         if (retval < 0) {
960                 send_sig(SIGKILL, current, 0);
961                 goto out;
962         }
963         /* N.B. passed_fileno might not be initialized? */
964         current->mm->end_code = end_code;
965         current->mm->start_code = start_code;
966         current->mm->start_data = start_data;
967         current->mm->end_data = end_data;
968         current->mm->start_stack = bprm->p;
969
970 #ifdef arch_randomize_brk
971         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
972                 current->mm->brk = current->mm->start_brk =
973                         arch_randomize_brk(current->mm);
974 #endif
975
976         if (current->personality & MMAP_PAGE_ZERO) {
977                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
978                    and some applications "depend" upon this behavior.
979                    Since we do not have the power to recompile these, we
980                    emulate the SVr4 behavior. Sigh. */
981                 down_write(&current->mm->mmap_sem);
982                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
983                                 MAP_FIXED | MAP_PRIVATE, 0);
984                 up_write(&current->mm->mmap_sem);
985         }
986
987 #ifdef ELF_PLAT_INIT
988         /*
989          * The ABI may specify that certain registers be set up in special
990          * ways (on i386 %edx is the address of a DT_FINI function, for
991          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
992          * that the e_entry field is the address of the function descriptor
993          * for the startup routine, rather than the address of the startup
994          * routine itself.  This macro performs whatever initialization to
995          * the regs structure is required as well as any relocations to the
996          * function descriptor entries when executing dynamically links apps.
997          */
998         ELF_PLAT_INIT(regs, reloc_func_desc);
999 #endif
1000
1001         start_thread(regs, elf_entry, bprm->p);
1002         retval = 0;
1003 out:
1004         kfree(loc);
1005 out_ret:
1006         return retval;
1007
1008         /* error cleanup */
1009 out_free_dentry:
1010         allow_write_access(interpreter);
1011         if (interpreter)
1012                 fput(interpreter);
1013 out_free_interp:
1014         kfree(elf_interpreter);
1015 out_free_ph:
1016         kfree(elf_phdata);
1017         goto out;
1018 }
1019
1020 /* This is really simpleminded and specialized - we are loading an
1021    a.out library that is given an ELF header. */
1022 static int load_elf_library(struct file *file)
1023 {
1024         struct elf_phdr *elf_phdata;
1025         struct elf_phdr *eppnt;
1026         unsigned long elf_bss, bss, len;
1027         int retval, error, i, j;
1028         struct elfhdr elf_ex;
1029
1030         error = -ENOEXEC;
1031         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1032         if (retval != sizeof(elf_ex))
1033                 goto out;
1034
1035         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1036                 goto out;
1037
1038         /* First of all, some simple consistency checks */
1039         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1040             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1041                 goto out;
1042
1043         /* Now read in all of the header information */
1044
1045         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1046         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1047
1048         error = -ENOMEM;
1049         elf_phdata = kmalloc(j, GFP_KERNEL);
1050         if (!elf_phdata)
1051                 goto out;
1052
1053         eppnt = elf_phdata;
1054         error = -ENOEXEC;
1055         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1056         if (retval != j)
1057                 goto out_free_ph;
1058
1059         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1060                 if ((eppnt + i)->p_type == PT_LOAD)
1061                         j++;
1062         if (j != 1)
1063                 goto out_free_ph;
1064
1065         while (eppnt->p_type != PT_LOAD)
1066                 eppnt++;
1067
1068         /* Now use mmap to map the library into memory. */
1069         down_write(&current->mm->mmap_sem);
1070         error = do_mmap(file,
1071                         ELF_PAGESTART(eppnt->p_vaddr),
1072                         (eppnt->p_filesz +
1073                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1074                         PROT_READ | PROT_WRITE | PROT_EXEC,
1075                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1076                         (eppnt->p_offset -
1077                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1078         up_write(&current->mm->mmap_sem);
1079         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1080                 goto out_free_ph;
1081
1082         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1083         if (padzero(elf_bss)) {
1084                 error = -EFAULT;
1085                 goto out_free_ph;
1086         }
1087
1088         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1089                             ELF_MIN_ALIGN - 1);
1090         bss = eppnt->p_memsz + eppnt->p_vaddr;
1091         if (bss > len) {
1092                 down_write(&current->mm->mmap_sem);
1093                 do_brk(len, bss - len);
1094                 up_write(&current->mm->mmap_sem);
1095         }
1096         error = 0;
1097
1098 out_free_ph:
1099         kfree(elf_phdata);
1100 out:
1101         return error;
1102 }
1103
1104 /*
1105  * Note that some platforms still use traditional core dumps and not
1106  * the ELF core dump.  Each platform can select it as appropriate.
1107  */
1108 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1109
1110 /*
1111  * ELF core dumper
1112  *
1113  * Modelled on fs/exec.c:aout_core_dump()
1114  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1115  */
1116 /*
1117  * These are the only things you should do on a core-file: use only these
1118  * functions to write out all the necessary info.
1119  */
1120 static int dump_write(struct file *file, const void *addr, int nr)
1121 {
1122         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1123 }
1124
1125 static int dump_seek(struct file *file, loff_t off)
1126 {
1127         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1128                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1129                         return 0;
1130         } else {
1131                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1132                 if (!buf)
1133                         return 0;
1134                 while (off > 0) {
1135                         unsigned long n = off;
1136                         if (n > PAGE_SIZE)
1137                                 n = PAGE_SIZE;
1138                         if (!dump_write(file, buf, n))
1139                                 return 0;
1140                         off -= n;
1141                 }
1142                 free_page((unsigned long)buf);
1143         }
1144         return 1;
1145 }
1146
1147 /*
1148  * Decide what to dump of a segment, part, all or none.
1149  */
1150 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1151                                    unsigned long mm_flags)
1152 {
1153 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1154
1155         /* The vma can be set up to tell us the answer directly.  */
1156         if (vma->vm_flags & VM_ALWAYSDUMP)
1157                 goto whole;
1158
1159         /* Hugetlb memory check */
1160         if (vma->vm_flags & VM_HUGETLB) {
1161                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1162                         goto whole;
1163                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1164                         goto whole;
1165         }
1166
1167         /* Do not dump I/O mapped devices or special mappings */
1168         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1169                 return 0;
1170
1171         /* By default, dump shared memory if mapped from an anonymous file. */
1172         if (vma->vm_flags & VM_SHARED) {
1173                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1174                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1175                         goto whole;
1176                 return 0;
1177         }
1178
1179         /* Dump segments that have been written to.  */
1180         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1181                 goto whole;
1182         if (vma->vm_file == NULL)
1183                 return 0;
1184
1185         if (FILTER(MAPPED_PRIVATE))
1186                 goto whole;
1187
1188         /*
1189          * If this looks like the beginning of a DSO or executable mapping,
1190          * check for an ELF header.  If we find one, dump the first page to
1191          * aid in determining what was mapped here.
1192          */
1193         if (FILTER(ELF_HEADERS) &&
1194             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1195                 u32 __user *header = (u32 __user *) vma->vm_start;
1196                 u32 word;
1197                 mm_segment_t fs = get_fs();
1198                 /*
1199                  * Doing it this way gets the constant folded by GCC.
1200                  */
1201                 union {
1202                         u32 cmp;
1203                         char elfmag[SELFMAG];
1204                 } magic;
1205                 BUILD_BUG_ON(SELFMAG != sizeof word);
1206                 magic.elfmag[EI_MAG0] = ELFMAG0;
1207                 magic.elfmag[EI_MAG1] = ELFMAG1;
1208                 magic.elfmag[EI_MAG2] = ELFMAG2;
1209                 magic.elfmag[EI_MAG3] = ELFMAG3;
1210                 /*
1211                  * Switch to the user "segment" for get_user(),
1212                  * then put back what elf_core_dump() had in place.
1213                  */
1214                 set_fs(USER_DS);
1215                 if (unlikely(get_user(word, header)))
1216                         word = 0;
1217                 set_fs(fs);
1218                 if (word == magic.cmp)
1219                         return PAGE_SIZE;
1220         }
1221
1222 #undef  FILTER
1223
1224         return 0;
1225
1226 whole:
1227         return vma->vm_end - vma->vm_start;
1228 }
1229
1230 /* An ELF note in memory */
1231 struct memelfnote
1232 {
1233         const char *name;
1234         int type;
1235         unsigned int datasz;
1236         void *data;
1237 };
1238
1239 static int notesize(struct memelfnote *en)
1240 {
1241         int sz;
1242
1243         sz = sizeof(struct elf_note);
1244         sz += roundup(strlen(en->name) + 1, 4);
1245         sz += roundup(en->datasz, 4);
1246
1247         return sz;
1248 }
1249
1250 #define DUMP_WRITE(addr, nr, foffset)   \
1251         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1252
1253 static int alignfile(struct file *file, loff_t *foffset)
1254 {
1255         static const char buf[4] = { 0, };
1256         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1257         return 1;
1258 }
1259
1260 static int writenote(struct memelfnote *men, struct file *file,
1261                         loff_t *foffset)
1262 {
1263         struct elf_note en;
1264         en.n_namesz = strlen(men->name) + 1;
1265         en.n_descsz = men->datasz;
1266         en.n_type = men->type;
1267
1268         DUMP_WRITE(&en, sizeof(en), foffset);
1269         DUMP_WRITE(men->name, en.n_namesz, foffset);
1270         if (!alignfile(file, foffset))
1271                 return 0;
1272         DUMP_WRITE(men->data, men->datasz, foffset);
1273         if (!alignfile(file, foffset))
1274                 return 0;
1275
1276         return 1;
1277 }
1278 #undef DUMP_WRITE
1279
1280 #define DUMP_WRITE(addr, nr)    \
1281         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1282                 goto end_coredump;
1283
1284 static void fill_elf_header(struct elfhdr *elf, int segs,
1285                             u16 machine, u32 flags, u8 osabi)
1286 {
1287         memset(elf, 0, sizeof(*elf));
1288
1289         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1290         elf->e_ident[EI_CLASS] = ELF_CLASS;
1291         elf->e_ident[EI_DATA] = ELF_DATA;
1292         elf->e_ident[EI_VERSION] = EV_CURRENT;
1293         elf->e_ident[EI_OSABI] = ELF_OSABI;
1294
1295         elf->e_type = ET_CORE;
1296         elf->e_machine = machine;
1297         elf->e_version = EV_CURRENT;
1298         elf->e_phoff = sizeof(struct elfhdr);
1299         elf->e_flags = flags;
1300         elf->e_ehsize = sizeof(struct elfhdr);
1301         elf->e_phentsize = sizeof(struct elf_phdr);
1302         elf->e_phnum = segs;
1303
1304         return;
1305 }
1306
1307 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1308 {
1309         phdr->p_type = PT_NOTE;
1310         phdr->p_offset = offset;
1311         phdr->p_vaddr = 0;
1312         phdr->p_paddr = 0;
1313         phdr->p_filesz = sz;
1314         phdr->p_memsz = 0;
1315         phdr->p_flags = 0;
1316         phdr->p_align = 0;
1317         return;
1318 }
1319
1320 static void fill_note(struct memelfnote *note, const char *name, int type, 
1321                 unsigned int sz, void *data)
1322 {
1323         note->name = name;
1324         note->type = type;
1325         note->datasz = sz;
1326         note->data = data;
1327         return;
1328 }
1329
1330 /*
1331  * fill up all the fields in prstatus from the given task struct, except
1332  * registers which need to be filled up separately.
1333  */
1334 static void fill_prstatus(struct elf_prstatus *prstatus,
1335                 struct task_struct *p, long signr)
1336 {
1337         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1338         prstatus->pr_sigpend = p->pending.signal.sig[0];
1339         prstatus->pr_sighold = p->blocked.sig[0];
1340         rcu_read_lock();
1341         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1342         rcu_read_unlock();
1343         prstatus->pr_pid = task_pid_vnr(p);
1344         prstatus->pr_pgrp = task_pgrp_vnr(p);
1345         prstatus->pr_sid = task_session_vnr(p);
1346         if (thread_group_leader(p)) {
1347                 struct task_cputime cputime;
1348
1349                 /*
1350                  * This is the record for the group leader.  It shows the
1351                  * group-wide total, not its individual thread total.
1352                  */
1353                 thread_group_cputime(p, &cputime);
1354                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1355                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1356         } else {
1357                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1358                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1359         }
1360         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1361         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1362 }
1363
1364 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1365                        struct mm_struct *mm)
1366 {
1367         const struct cred *cred;
1368         unsigned int i, len;
1369         
1370         /* first copy the parameters from user space */
1371         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1372
1373         len = mm->arg_end - mm->arg_start;
1374         if (len >= ELF_PRARGSZ)
1375                 len = ELF_PRARGSZ-1;
1376         if (copy_from_user(&psinfo->pr_psargs,
1377                            (const char __user *)mm->arg_start, len))
1378                 return -EFAULT;
1379         for(i = 0; i < len; i++)
1380                 if (psinfo->pr_psargs[i] == 0)
1381                         psinfo->pr_psargs[i] = ' ';
1382         psinfo->pr_psargs[len] = 0;
1383
1384         rcu_read_lock();
1385         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1386         rcu_read_unlock();
1387         psinfo->pr_pid = task_pid_vnr(p);
1388         psinfo->pr_pgrp = task_pgrp_vnr(p);
1389         psinfo->pr_sid = task_session_vnr(p);
1390
1391         i = p->state ? ffz(~p->state) + 1 : 0;
1392         psinfo->pr_state = i;
1393         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1394         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1395         psinfo->pr_nice = task_nice(p);
1396         psinfo->pr_flag = p->flags;
1397         rcu_read_lock();
1398         cred = __task_cred(p);
1399         SET_UID(psinfo->pr_uid, cred->uid);
1400         SET_GID(psinfo->pr_gid, cred->gid);
1401         rcu_read_unlock();
1402         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1403         
1404         return 0;
1405 }
1406
1407 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1408 {
1409         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1410         int i = 0;
1411         do
1412                 i += 2;
1413         while (auxv[i - 2] != AT_NULL);
1414         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1415 }
1416
1417 #ifdef CORE_DUMP_USE_REGSET
1418 #include <linux/regset.h>
1419
1420 struct elf_thread_core_info {
1421         struct elf_thread_core_info *next;
1422         struct task_struct *task;
1423         struct elf_prstatus prstatus;
1424         struct memelfnote notes[0];
1425 };
1426
1427 struct elf_note_info {
1428         struct elf_thread_core_info *thread;
1429         struct memelfnote psinfo;
1430         struct memelfnote auxv;
1431         size_t size;
1432         int thread_notes;
1433 };
1434
1435 /*
1436  * When a regset has a writeback hook, we call it on each thread before
1437  * dumping user memory.  On register window machines, this makes sure the
1438  * user memory backing the register data is up to date before we read it.
1439  */
1440 static void do_thread_regset_writeback(struct task_struct *task,
1441                                        const struct user_regset *regset)
1442 {
1443         if (regset->writeback)
1444                 regset->writeback(task, regset, 1);
1445 }
1446
1447 static int fill_thread_core_info(struct elf_thread_core_info *t,
1448                                  const struct user_regset_view *view,
1449                                  long signr, size_t *total)
1450 {
1451         unsigned int i;
1452
1453         /*
1454          * NT_PRSTATUS is the one special case, because the regset data
1455          * goes into the pr_reg field inside the note contents, rather
1456          * than being the whole note contents.  We fill the reset in here.
1457          * We assume that regset 0 is NT_PRSTATUS.
1458          */
1459         fill_prstatus(&t->prstatus, t->task, signr);
1460         (void) view->regsets[0].get(t->task, &view->regsets[0],
1461                                     0, sizeof(t->prstatus.pr_reg),
1462                                     &t->prstatus.pr_reg, NULL);
1463
1464         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1465                   sizeof(t->prstatus), &t->prstatus);
1466         *total += notesize(&t->notes[0]);
1467
1468         do_thread_regset_writeback(t->task, &view->regsets[0]);
1469
1470         /*
1471          * Each other regset might generate a note too.  For each regset
1472          * that has no core_note_type or is inactive, we leave t->notes[i]
1473          * all zero and we'll know to skip writing it later.
1474          */
1475         for (i = 1; i < view->n; ++i) {
1476                 const struct user_regset *regset = &view->regsets[i];
1477                 do_thread_regset_writeback(t->task, regset);
1478                 if (regset->core_note_type &&
1479                     (!regset->active || regset->active(t->task, regset))) {
1480                         int ret;
1481                         size_t size = regset->n * regset->size;
1482                         void *data = kmalloc(size, GFP_KERNEL);
1483                         if (unlikely(!data))
1484                                 return 0;
1485                         ret = regset->get(t->task, regset,
1486                                           0, size, data, NULL);
1487                         if (unlikely(ret))
1488                                 kfree(data);
1489                         else {
1490                                 if (regset->core_note_type != NT_PRFPREG)
1491                                         fill_note(&t->notes[i], "LINUX",
1492                                                   regset->core_note_type,
1493                                                   size, data);
1494                                 else {
1495                                         t->prstatus.pr_fpvalid = 1;
1496                                         fill_note(&t->notes[i], "CORE",
1497                                                   NT_PRFPREG, size, data);
1498                                 }
1499                                 *total += notesize(&t->notes[i]);
1500                         }
1501                 }
1502         }
1503
1504         return 1;
1505 }
1506
1507 static int fill_note_info(struct elfhdr *elf, int phdrs,
1508                           struct elf_note_info *info,
1509                           long signr, struct pt_regs *regs)
1510 {
1511         struct task_struct *dump_task = current;
1512         const struct user_regset_view *view = task_user_regset_view(dump_task);
1513         struct elf_thread_core_info *t;
1514         struct elf_prpsinfo *psinfo;
1515         struct core_thread *ct;
1516         unsigned int i;
1517
1518         info->size = 0;
1519         info->thread = NULL;
1520
1521         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1522         if (psinfo == NULL)
1523                 return 0;
1524
1525         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1526
1527         /*
1528          * Figure out how many notes we're going to need for each thread.
1529          */
1530         info->thread_notes = 0;
1531         for (i = 0; i < view->n; ++i)
1532                 if (view->regsets[i].core_note_type != 0)
1533                         ++info->thread_notes;
1534
1535         /*
1536          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1537          * since it is our one special case.
1538          */
1539         if (unlikely(info->thread_notes == 0) ||
1540             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1541                 WARN_ON(1);
1542                 return 0;
1543         }
1544
1545         /*
1546          * Initialize the ELF file header.
1547          */
1548         fill_elf_header(elf, phdrs,
1549                         view->e_machine, view->e_flags, view->ei_osabi);
1550
1551         /*
1552          * Allocate a structure for each thread.
1553          */
1554         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1555                 t = kzalloc(offsetof(struct elf_thread_core_info,
1556                                      notes[info->thread_notes]),
1557                             GFP_KERNEL);
1558                 if (unlikely(!t))
1559                         return 0;
1560
1561                 t->task = ct->task;
1562                 if (ct->task == dump_task || !info->thread) {
1563                         t->next = info->thread;
1564                         info->thread = t;
1565                 } else {
1566                         /*
1567                          * Make sure to keep the original task at
1568                          * the head of the list.
1569                          */
1570                         t->next = info->thread->next;
1571                         info->thread->next = t;
1572                 }
1573         }
1574
1575         /*
1576          * Now fill in each thread's information.
1577          */
1578         for (t = info->thread; t != NULL; t = t->next)
1579                 if (!fill_thread_core_info(t, view, signr, &info->size))
1580                         return 0;
1581
1582         /*
1583          * Fill in the two process-wide notes.
1584          */
1585         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1586         info->size += notesize(&info->psinfo);
1587
1588         fill_auxv_note(&info->auxv, current->mm);
1589         info->size += notesize(&info->auxv);
1590
1591         return 1;
1592 }
1593
1594 static size_t get_note_info_size(struct elf_note_info *info)
1595 {
1596         return info->size;
1597 }
1598
1599 /*
1600  * Write all the notes for each thread.  When writing the first thread, the
1601  * process-wide notes are interleaved after the first thread-specific note.
1602  */
1603 static int write_note_info(struct elf_note_info *info,
1604                            struct file *file, loff_t *foffset)
1605 {
1606         bool first = 1;
1607         struct elf_thread_core_info *t = info->thread;
1608
1609         do {
1610                 int i;
1611
1612                 if (!writenote(&t->notes[0], file, foffset))
1613                         return 0;
1614
1615                 if (first && !writenote(&info->psinfo, file, foffset))
1616                         return 0;
1617                 if (first && !writenote(&info->auxv, file, foffset))
1618                         return 0;
1619
1620                 for (i = 1; i < info->thread_notes; ++i)
1621                         if (t->notes[i].data &&
1622                             !writenote(&t->notes[i], file, foffset))
1623                                 return 0;
1624
1625                 first = 0;
1626                 t = t->next;
1627         } while (t);
1628
1629         return 1;
1630 }
1631
1632 static void free_note_info(struct elf_note_info *info)
1633 {
1634         struct elf_thread_core_info *threads = info->thread;
1635         while (threads) {
1636                 unsigned int i;
1637                 struct elf_thread_core_info *t = threads;
1638                 threads = t->next;
1639                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1640                 for (i = 1; i < info->thread_notes; ++i)
1641                         kfree(t->notes[i].data);
1642                 kfree(t);
1643         }
1644         kfree(info->psinfo.data);
1645 }
1646
1647 #else
1648
1649 /* Here is the structure in which status of each thread is captured. */
1650 struct elf_thread_status
1651 {
1652         struct list_head list;
1653         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1654         elf_fpregset_t fpu;             /* NT_PRFPREG */
1655         struct task_struct *thread;
1656 #ifdef ELF_CORE_COPY_XFPREGS
1657         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1658 #endif
1659         struct memelfnote notes[3];
1660         int num_notes;
1661 };
1662
1663 /*
1664  * In order to add the specific thread information for the elf file format,
1665  * we need to keep a linked list of every threads pr_status and then create
1666  * a single section for them in the final core file.
1667  */
1668 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1669 {
1670         int sz = 0;
1671         struct task_struct *p = t->thread;
1672         t->num_notes = 0;
1673
1674         fill_prstatus(&t->prstatus, p, signr);
1675         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1676         
1677         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1678                   &(t->prstatus));
1679         t->num_notes++;
1680         sz += notesize(&t->notes[0]);
1681
1682         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1683                                                                 &t->fpu))) {
1684                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1685                           &(t->fpu));
1686                 t->num_notes++;
1687                 sz += notesize(&t->notes[1]);
1688         }
1689
1690 #ifdef ELF_CORE_COPY_XFPREGS
1691         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1692                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1693                           sizeof(t->xfpu), &t->xfpu);
1694                 t->num_notes++;
1695                 sz += notesize(&t->notes[2]);
1696         }
1697 #endif  
1698         return sz;
1699 }
1700
1701 struct elf_note_info {
1702         struct memelfnote *notes;
1703         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1704         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1705         struct list_head thread_list;
1706         elf_fpregset_t *fpu;
1707 #ifdef ELF_CORE_COPY_XFPREGS
1708         elf_fpxregset_t *xfpu;
1709 #endif
1710         int thread_status_size;
1711         int numnote;
1712 };
1713
1714 static int elf_note_info_init(struct elf_note_info *info)
1715 {
1716         memset(info, 0, sizeof(*info));
1717         INIT_LIST_HEAD(&info->thread_list);
1718
1719         /* Allocate space for six ELF notes */
1720         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1721         if (!info->notes)
1722                 return 0;
1723         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1724         if (!info->psinfo)
1725                 goto notes_free;
1726         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1727         if (!info->prstatus)
1728                 goto psinfo_free;
1729         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1730         if (!info->fpu)
1731                 goto prstatus_free;
1732 #ifdef ELF_CORE_COPY_XFPREGS
1733         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1734         if (!info->xfpu)
1735                 goto fpu_free;
1736 #endif
1737         return 1;
1738 #ifdef ELF_CORE_COPY_XFPREGS
1739  fpu_free:
1740         kfree(info->fpu);
1741 #endif
1742  prstatus_free:
1743         kfree(info->prstatus);
1744  psinfo_free:
1745         kfree(info->psinfo);
1746  notes_free:
1747         kfree(info->notes);
1748         return 0;
1749 }
1750
1751 static int fill_note_info(struct elfhdr *elf, int phdrs,
1752                           struct elf_note_info *info,
1753                           long signr, struct pt_regs *regs)
1754 {
1755         struct list_head *t;
1756
1757         if (!elf_note_info_init(info))
1758                 return 0;
1759
1760         if (signr) {
1761                 struct core_thread *ct;
1762                 struct elf_thread_status *ets;
1763
1764                 for (ct = current->mm->core_state->dumper.next;
1765                                                 ct; ct = ct->next) {
1766                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1767                         if (!ets)
1768                                 return 0;
1769
1770                         ets->thread = ct->task;
1771                         list_add(&ets->list, &info->thread_list);
1772                 }
1773
1774                 list_for_each(t, &info->thread_list) {
1775                         int sz;
1776
1777                         ets = list_entry(t, struct elf_thread_status, list);
1778                         sz = elf_dump_thread_status(signr, ets);
1779                         info->thread_status_size += sz;
1780                 }
1781         }
1782         /* now collect the dump for the current */
1783         memset(info->prstatus, 0, sizeof(*info->prstatus));
1784         fill_prstatus(info->prstatus, current, signr);
1785         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1786
1787         /* Set up header */
1788         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1789
1790         /*
1791          * Set up the notes in similar form to SVR4 core dumps made
1792          * with info from their /proc.
1793          */
1794
1795         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1796                   sizeof(*info->prstatus), info->prstatus);
1797         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1798         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1799                   sizeof(*info->psinfo), info->psinfo);
1800
1801         info->numnote = 2;
1802
1803         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1804
1805         /* Try to dump the FPU. */
1806         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1807                                                                info->fpu);
1808         if (info->prstatus->pr_fpvalid)
1809                 fill_note(info->notes + info->numnote++,
1810                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1811 #ifdef ELF_CORE_COPY_XFPREGS
1812         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1813                 fill_note(info->notes + info->numnote++,
1814                           "LINUX", ELF_CORE_XFPREG_TYPE,
1815                           sizeof(*info->xfpu), info->xfpu);
1816 #endif
1817
1818         return 1;
1819 }
1820
1821 static size_t get_note_info_size(struct elf_note_info *info)
1822 {
1823         int sz = 0;
1824         int i;
1825
1826         for (i = 0; i < info->numnote; i++)
1827                 sz += notesize(info->notes + i);
1828
1829         sz += info->thread_status_size;
1830
1831         return sz;
1832 }
1833
1834 static int write_note_info(struct elf_note_info *info,
1835                            struct file *file, loff_t *foffset)
1836 {
1837         int i;
1838         struct list_head *t;
1839
1840         for (i = 0; i < info->numnote; i++)
1841                 if (!writenote(info->notes + i, file, foffset))
1842                         return 0;
1843
1844         /* write out the thread status notes section */
1845         list_for_each(t, &info->thread_list) {
1846                 struct elf_thread_status *tmp =
1847                                 list_entry(t, struct elf_thread_status, list);
1848
1849                 for (i = 0; i < tmp->num_notes; i++)
1850                         if (!writenote(&tmp->notes[i], file, foffset))
1851                                 return 0;
1852         }
1853
1854         return 1;
1855 }
1856
1857 static void free_note_info(struct elf_note_info *info)
1858 {
1859         while (!list_empty(&info->thread_list)) {
1860                 struct list_head *tmp = info->thread_list.next;
1861                 list_del(tmp);
1862                 kfree(list_entry(tmp, struct elf_thread_status, list));
1863         }
1864
1865         kfree(info->prstatus);
1866         kfree(info->psinfo);
1867         kfree(info->notes);
1868         kfree(info->fpu);
1869 #ifdef ELF_CORE_COPY_XFPREGS
1870         kfree(info->xfpu);
1871 #endif
1872 }
1873
1874 #endif
1875
1876 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1877                                         struct vm_area_struct *gate_vma)
1878 {
1879         struct vm_area_struct *ret = tsk->mm->mmap;
1880
1881         if (ret)
1882                 return ret;
1883         return gate_vma;
1884 }
1885 /*
1886  * Helper function for iterating across a vma list.  It ensures that the caller
1887  * will visit `gate_vma' prior to terminating the search.
1888  */
1889 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1890                                         struct vm_area_struct *gate_vma)
1891 {
1892         struct vm_area_struct *ret;
1893
1894         ret = this_vma->vm_next;
1895         if (ret)
1896                 return ret;
1897         if (this_vma == gate_vma)
1898                 return NULL;
1899         return gate_vma;
1900 }
1901
1902 /*
1903  * Actual dumper
1904  *
1905  * This is a two-pass process; first we find the offsets of the bits,
1906  * and then they are actually written out.  If we run out of core limit
1907  * we just truncate.
1908  */
1909 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1910 {
1911         int has_dumped = 0;
1912         mm_segment_t fs;
1913         int segs;
1914         size_t size = 0;
1915         struct vm_area_struct *vma, *gate_vma;
1916         struct elfhdr *elf = NULL;
1917         loff_t offset = 0, dataoff, foffset;
1918         unsigned long mm_flags;
1919         struct elf_note_info info;
1920
1921         /*
1922          * We no longer stop all VM operations.
1923          * 
1924          * This is because those proceses that could possibly change map_count
1925          * or the mmap / vma pages are now blocked in do_exit on current
1926          * finishing this core dump.
1927          *
1928          * Only ptrace can touch these memory addresses, but it doesn't change
1929          * the map_count or the pages allocated. So no possibility of crashing
1930          * exists while dumping the mm->vm_next areas to the core file.
1931          */
1932   
1933         /* alloc memory for large data structures: too large to be on stack */
1934         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1935         if (!elf)
1936                 goto out;
1937         /*
1938          * The number of segs are recored into ELF header as 16bit value.
1939          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1940          */
1941         segs = current->mm->map_count;
1942 #ifdef ELF_CORE_EXTRA_PHDRS
1943         segs += ELF_CORE_EXTRA_PHDRS;
1944 #endif
1945
1946         gate_vma = get_gate_vma(current);
1947         if (gate_vma != NULL)
1948                 segs++;
1949
1950         /*
1951          * Collect all the non-memory information about the process for the
1952          * notes.  This also sets up the file header.
1953          */
1954         if (!fill_note_info(elf, segs + 1, /* including notes section */
1955                             &info, signr, regs))
1956                 goto cleanup;
1957
1958         has_dumped = 1;
1959         current->flags |= PF_DUMPCORE;
1960   
1961         fs = get_fs();
1962         set_fs(KERNEL_DS);
1963
1964         DUMP_WRITE(elf, sizeof(*elf));
1965         offset += sizeof(*elf);                         /* Elf header */
1966         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1967         foffset = offset;
1968
1969         /* Write notes phdr entry */
1970         {
1971                 struct elf_phdr phdr;
1972                 size_t sz = get_note_info_size(&info);
1973
1974                 sz += elf_coredump_extra_notes_size();
1975
1976                 fill_elf_note_phdr(&phdr, sz, offset);
1977                 offset += sz;
1978                 DUMP_WRITE(&phdr, sizeof(phdr));
1979         }
1980
1981         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1982
1983         /*
1984          * We must use the same mm->flags while dumping core to avoid
1985          * inconsistency between the program headers and bodies, otherwise an
1986          * unusable core file can be generated.
1987          */
1988         mm_flags = current->mm->flags;
1989
1990         /* Write program headers for segments dump */
1991         for (vma = first_vma(current, gate_vma); vma != NULL;
1992                         vma = next_vma(vma, gate_vma)) {
1993                 struct elf_phdr phdr;
1994
1995                 phdr.p_type = PT_LOAD;
1996                 phdr.p_offset = offset;
1997                 phdr.p_vaddr = vma->vm_start;
1998                 phdr.p_paddr = 0;
1999                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
2000                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2001                 offset += phdr.p_filesz;
2002                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2003                 if (vma->vm_flags & VM_WRITE)
2004                         phdr.p_flags |= PF_W;
2005                 if (vma->vm_flags & VM_EXEC)
2006                         phdr.p_flags |= PF_X;
2007                 phdr.p_align = ELF_EXEC_PAGESIZE;
2008
2009                 DUMP_WRITE(&phdr, sizeof(phdr));
2010         }
2011
2012 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2013         ELF_CORE_WRITE_EXTRA_PHDRS;
2014 #endif
2015
2016         /* write out the notes section */
2017         if (!write_note_info(&info, file, &foffset))
2018                 goto end_coredump;
2019
2020         if (elf_coredump_extra_notes_write(file, &foffset))
2021                 goto end_coredump;
2022
2023         /* Align to page */
2024         if (!dump_seek(file, dataoff - foffset))
2025                 goto end_coredump;
2026
2027         for (vma = first_vma(current, gate_vma); vma != NULL;
2028                         vma = next_vma(vma, gate_vma)) {
2029                 unsigned long addr;
2030                 unsigned long end;
2031
2032                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2033
2034                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2035                         struct page *page;
2036                         int stop;
2037
2038                         page = get_dump_page(addr);
2039                         if (page) {
2040                                 void *kaddr = kmap(page);
2041                                 stop = ((size += PAGE_SIZE) > limit) ||
2042                                         !dump_write(file, kaddr, PAGE_SIZE);
2043                                 kunmap(page);
2044                                 page_cache_release(page);
2045                         } else
2046                                 stop = !dump_seek(file, PAGE_SIZE);
2047                         if (stop)
2048                                 goto end_coredump;
2049                 }
2050         }
2051
2052 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2053         ELF_CORE_WRITE_EXTRA_DATA;
2054 #endif
2055
2056 end_coredump:
2057         set_fs(fs);
2058
2059 cleanup:
2060         free_note_info(&info);
2061         kfree(elf);
2062 out:
2063         return has_dumped;
2064 }
2065
2066 #endif          /* USE_ELF_CORE_DUMP */
2067
2068 static int __init init_elf_binfmt(void)
2069 {
2070         return register_binfmt(&elf_format);
2071 }
2072
2073 static void __exit exit_elf_binfmt(void)
2074 {
2075         /* Remove the COFF and ELF loaders. */
2076         unregister_binfmt(&elf_format);
2077 }
2078
2079 core_initcall(init_elf_binfmt);
2080 module_exit(exit_elf_binfmt);
2081 MODULE_LICENSE("GPL");