coredump: elf_core_dump: skip kernel threads
[safe/jmp/linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48                                 int, int, unsigned long);
49
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump   NULL
58 #endif
59
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN   PAGE_SIZE
64 #endif
65
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS 0
68 #endif
69
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74 static struct linux_binfmt elf_format = {
75                 .module         = THIS_MODULE,
76                 .load_binary    = load_elf_binary,
77                 .load_shlib     = load_elf_library,
78                 .core_dump      = elf_core_dump,
79                 .min_coredump   = ELF_EXEC_PAGESIZE,
80                 .hasvdso        = 1
81 };
82
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87         start = ELF_PAGEALIGN(start);
88         end = ELF_PAGEALIGN(end);
89         if (end > start) {
90                 unsigned long addr;
91                 down_write(&current->mm->mmap_sem);
92                 addr = do_brk(start, end - start);
93                 up_write(&current->mm->mmap_sem);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136                 unsigned long load_addr, unsigned long interp_load_addr)
137 {
138         unsigned long p = bprm->p;
139         int argc = bprm->argc;
140         int envc = bprm->envc;
141         elf_addr_t __user *argv;
142         elf_addr_t __user *envp;
143         elf_addr_t __user *sp;
144         elf_addr_t __user *u_platform;
145         const char *k_platform = ELF_PLATFORM;
146         int items;
147         elf_addr_t *elf_info;
148         int ei_index = 0;
149         struct task_struct *tsk = current;
150         struct vm_area_struct *vma;
151
152         /*
153          * In some cases (e.g. Hyper-Threading), we want to avoid L1
154          * evictions by the processes running on the same package. One
155          * thing we can do is to shuffle the initial stack for them.
156          */
157
158         p = arch_align_stack(p);
159
160         /*
161          * If this architecture has a platform capability string, copy it
162          * to userspace.  In some cases (Sparc), this info is impossible
163          * for userspace to get any other way, in others (i386) it is
164          * merely difficult.
165          */
166         u_platform = NULL;
167         if (k_platform) {
168                 size_t len = strlen(k_platform) + 1;
169
170                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171                 if (__copy_to_user(u_platform, k_platform, len))
172                         return -EFAULT;
173         }
174
175         /* Create the ELF interpreter info */
176         elf_info = (elf_addr_t *)current->mm->saved_auxv;
177         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
178 #define NEW_AUX_ENT(id, val) \
179         do { \
180                 elf_info[ei_index++] = id; \
181                 elf_info[ei_index++] = val; \
182         } while (0)
183
184 #ifdef ARCH_DLINFO
185         /* 
186          * ARCH_DLINFO must come first so PPC can do its special alignment of
187          * AUXV.
188          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
189          * ARCH_DLINFO changes
190          */
191         ARCH_DLINFO;
192 #endif
193         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
194         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
195         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
196         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
197         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
198         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
199         NEW_AUX_ENT(AT_BASE, interp_load_addr);
200         NEW_AUX_ENT(AT_FLAGS, 0);
201         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
202         NEW_AUX_ENT(AT_UID, tsk->uid);
203         NEW_AUX_ENT(AT_EUID, tsk->euid);
204         NEW_AUX_ENT(AT_GID, tsk->gid);
205         NEW_AUX_ENT(AT_EGID, tsk->egid);
206         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
207         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
208         if (k_platform) {
209                 NEW_AUX_ENT(AT_PLATFORM,
210                             (elf_addr_t)(unsigned long)u_platform);
211         }
212         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214         }
215 #undef NEW_AUX_ENT
216         /* AT_NULL is zero; clear the rest too */
217         memset(&elf_info[ei_index], 0,
218                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219
220         /* And advance past the AT_NULL entry.  */
221         ei_index += 2;
222
223         sp = STACK_ADD(p, ei_index);
224
225         items = (argc + 1) + (envc + 1) + 1;
226         bprm->p = STACK_ROUND(sp, items);
227
228         /* Point sp at the lowest address on the stack */
229 #ifdef CONFIG_STACK_GROWSUP
230         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
231         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
232 #else
233         sp = (elf_addr_t __user *)bprm->p;
234 #endif
235
236
237         /*
238          * Grow the stack manually; some architectures have a limit on how
239          * far ahead a user-space access may be in order to grow the stack.
240          */
241         vma = find_extend_vma(current->mm, bprm->p);
242         if (!vma)
243                 return -EFAULT;
244
245         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
246         if (__put_user(argc, sp++))
247                 return -EFAULT;
248         argv = sp;
249         envp = argv + argc + 1;
250
251         /* Populate argv and envp */
252         p = current->mm->arg_end = current->mm->arg_start;
253         while (argc-- > 0) {
254                 size_t len;
255                 if (__put_user((elf_addr_t)p, argv++))
256                         return -EFAULT;
257                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
258                 if (!len || len > MAX_ARG_STRLEN)
259                         return -EINVAL;
260                 p += len;
261         }
262         if (__put_user(0, argv))
263                 return -EFAULT;
264         current->mm->arg_end = current->mm->env_start = p;
265         while (envc-- > 0) {
266                 size_t len;
267                 if (__put_user((elf_addr_t)p, envp++))
268                         return -EFAULT;
269                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
270                 if (!len || len > MAX_ARG_STRLEN)
271                         return -EINVAL;
272                 p += len;
273         }
274         if (__put_user(0, envp))
275                 return -EFAULT;
276         current->mm->env_end = p;
277
278         /* Put the elf_info on the stack in the right place.  */
279         sp = (elf_addr_t __user *)envp + 1;
280         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281                 return -EFAULT;
282         return 0;
283 }
284
285 #ifndef elf_map
286
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288                 struct elf_phdr *eppnt, int prot, int type,
289                 unsigned long total_size)
290 {
291         unsigned long map_addr;
292         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294         addr = ELF_PAGESTART(addr);
295         size = ELF_PAGEALIGN(size);
296
297         /* mmap() will return -EINVAL if given a zero size, but a
298          * segment with zero filesize is perfectly valid */
299         if (!size)
300                 return addr;
301
302         down_write(&current->mm->mmap_sem);
303         /*
304         * total_size is the size of the ELF (interpreter) image.
305         * The _first_ mmap needs to know the full size, otherwise
306         * randomization might put this image into an overlapping
307         * position with the ELF binary image. (since size < total_size)
308         * So we first map the 'big' image - and unmap the remainder at
309         * the end. (which unmap is needed for ELF images with holes.)
310         */
311         if (total_size) {
312                 total_size = ELF_PAGEALIGN(total_size);
313                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314                 if (!BAD_ADDR(map_addr))
315                         do_munmap(current->mm, map_addr+size, total_size-size);
316         } else
317                 map_addr = do_mmap(filep, addr, size, prot, type, off);
318
319         up_write(&current->mm->mmap_sem);
320         return(map_addr);
321 }
322
323 #endif /* !elf_map */
324
325 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
326 {
327         int i, first_idx = -1, last_idx = -1;
328
329         for (i = 0; i < nr; i++) {
330                 if (cmds[i].p_type == PT_LOAD) {
331                         last_idx = i;
332                         if (first_idx == -1)
333                                 first_idx = i;
334                 }
335         }
336         if (first_idx == -1)
337                 return 0;
338
339         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
341 }
342
343
344 /* This is much more generalized than the library routine read function,
345    so we keep this separate.  Technically the library read function
346    is only provided so that we can read a.out libraries that have
347    an ELF header */
348
349 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
350                 struct file *interpreter, unsigned long *interp_map_addr,
351                 unsigned long no_base)
352 {
353         struct elf_phdr *elf_phdata;
354         struct elf_phdr *eppnt;
355         unsigned long load_addr = 0;
356         int load_addr_set = 0;
357         unsigned long last_bss = 0, elf_bss = 0;
358         unsigned long error = ~0UL;
359         unsigned long total_size;
360         int retval, i, size;
361
362         /* First of all, some simple consistency checks */
363         if (interp_elf_ex->e_type != ET_EXEC &&
364             interp_elf_ex->e_type != ET_DYN)
365                 goto out;
366         if (!elf_check_arch(interp_elf_ex))
367                 goto out;
368         if (!interpreter->f_op || !interpreter->f_op->mmap)
369                 goto out;
370
371         /*
372          * If the size of this structure has changed, then punt, since
373          * we will be doing the wrong thing.
374          */
375         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
376                 goto out;
377         if (interp_elf_ex->e_phnum < 1 ||
378                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
379                 goto out;
380
381         /* Now read in all of the header information */
382         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
383         if (size > ELF_MIN_ALIGN)
384                 goto out;
385         elf_phdata = kmalloc(size, GFP_KERNEL);
386         if (!elf_phdata)
387                 goto out;
388
389         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
390                              (char *)elf_phdata,size);
391         error = -EIO;
392         if (retval != size) {
393                 if (retval < 0)
394                         error = retval; 
395                 goto out_close;
396         }
397
398         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399         if (!total_size) {
400                 error = -EINVAL;
401                 goto out_close;
402         }
403
404         eppnt = elf_phdata;
405         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
406                 if (eppnt->p_type == PT_LOAD) {
407                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
408                         int elf_prot = 0;
409                         unsigned long vaddr = 0;
410                         unsigned long k, map_addr;
411
412                         if (eppnt->p_flags & PF_R)
413                                 elf_prot = PROT_READ;
414                         if (eppnt->p_flags & PF_W)
415                                 elf_prot |= PROT_WRITE;
416                         if (eppnt->p_flags & PF_X)
417                                 elf_prot |= PROT_EXEC;
418                         vaddr = eppnt->p_vaddr;
419                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
420                                 elf_type |= MAP_FIXED;
421                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
422                                 load_addr = -vaddr;
423
424                         map_addr = elf_map(interpreter, load_addr + vaddr,
425                                         eppnt, elf_prot, elf_type, total_size);
426                         total_size = 0;
427                         if (!*interp_map_addr)
428                                 *interp_map_addr = map_addr;
429                         error = map_addr;
430                         if (BAD_ADDR(map_addr))
431                                 goto out_close;
432
433                         if (!load_addr_set &&
434                             interp_elf_ex->e_type == ET_DYN) {
435                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
436                                 load_addr_set = 1;
437                         }
438
439                         /*
440                          * Check to see if the section's size will overflow the
441                          * allowed task size. Note that p_filesz must always be
442                          * <= p_memsize so it's only necessary to check p_memsz.
443                          */
444                         k = load_addr + eppnt->p_vaddr;
445                         if (BAD_ADDR(k) ||
446                             eppnt->p_filesz > eppnt->p_memsz ||
447                             eppnt->p_memsz > TASK_SIZE ||
448                             TASK_SIZE - eppnt->p_memsz < k) {
449                                 error = -ENOMEM;
450                                 goto out_close;
451                         }
452
453                         /*
454                          * Find the end of the file mapping for this phdr, and
455                          * keep track of the largest address we see for this.
456                          */
457                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
458                         if (k > elf_bss)
459                                 elf_bss = k;
460
461                         /*
462                          * Do the same thing for the memory mapping - between
463                          * elf_bss and last_bss is the bss section.
464                          */
465                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
466                         if (k > last_bss)
467                                 last_bss = k;
468                 }
469         }
470
471         /*
472          * Now fill out the bss section.  First pad the last page up
473          * to the page boundary, and then perform a mmap to make sure
474          * that there are zero-mapped pages up to and including the 
475          * last bss page.
476          */
477         if (padzero(elf_bss)) {
478                 error = -EFAULT;
479                 goto out_close;
480         }
481
482         /* What we have mapped so far */
483         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
484
485         /* Map the last of the bss segment */
486         if (last_bss > elf_bss) {
487                 down_write(&current->mm->mmap_sem);
488                 error = do_brk(elf_bss, last_bss - elf_bss);
489                 up_write(&current->mm->mmap_sem);
490                 if (BAD_ADDR(error))
491                         goto out_close;
492         }
493
494         error = load_addr;
495
496 out_close:
497         kfree(elf_phdata);
498 out:
499         return error;
500 }
501
502 /*
503  * These are the functions used to load ELF style executables and shared
504  * libraries.  There is no binary dependent code anywhere else.
505  */
506
507 #define INTERPRETER_NONE 0
508 #define INTERPRETER_ELF 2
509
510 #ifndef STACK_RND_MASK
511 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
512 #endif
513
514 static unsigned long randomize_stack_top(unsigned long stack_top)
515 {
516         unsigned int random_variable = 0;
517
518         if ((current->flags & PF_RANDOMIZE) &&
519                 !(current->personality & ADDR_NO_RANDOMIZE)) {
520                 random_variable = get_random_int() & STACK_RND_MASK;
521                 random_variable <<= PAGE_SHIFT;
522         }
523 #ifdef CONFIG_STACK_GROWSUP
524         return PAGE_ALIGN(stack_top) + random_variable;
525 #else
526         return PAGE_ALIGN(stack_top) - random_variable;
527 #endif
528 }
529
530 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
531 {
532         struct file *interpreter = NULL; /* to shut gcc up */
533         unsigned long load_addr = 0, load_bias = 0;
534         int load_addr_set = 0;
535         char * elf_interpreter = NULL;
536         unsigned long error;
537         struct elf_phdr *elf_ppnt, *elf_phdata;
538         unsigned long elf_bss, elf_brk;
539         int elf_exec_fileno;
540         int retval, i;
541         unsigned int size;
542         unsigned long elf_entry;
543         unsigned long interp_load_addr = 0;
544         unsigned long start_code, end_code, start_data, end_data;
545         unsigned long reloc_func_desc = 0;
546         int executable_stack = EXSTACK_DEFAULT;
547         unsigned long def_flags = 0;
548         struct {
549                 struct elfhdr elf_ex;
550                 struct elfhdr interp_elf_ex;
551         } *loc;
552
553         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
554         if (!loc) {
555                 retval = -ENOMEM;
556                 goto out_ret;
557         }
558         
559         /* Get the exec-header */
560         loc->elf_ex = *((struct elfhdr *)bprm->buf);
561
562         retval = -ENOEXEC;
563         /* First of all, some simple consistency checks */
564         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
565                 goto out;
566
567         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
568                 goto out;
569         if (!elf_check_arch(&loc->elf_ex))
570                 goto out;
571         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
572                 goto out;
573
574         /* Now read in all of the header information */
575         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
576                 goto out;
577         if (loc->elf_ex.e_phnum < 1 ||
578                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
579                 goto out;
580         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
581         retval = -ENOMEM;
582         elf_phdata = kmalloc(size, GFP_KERNEL);
583         if (!elf_phdata)
584                 goto out;
585
586         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
587                              (char *)elf_phdata, size);
588         if (retval != size) {
589                 if (retval >= 0)
590                         retval = -EIO;
591                 goto out_free_ph;
592         }
593
594         retval = get_unused_fd();
595         if (retval < 0)
596                 goto out_free_ph;
597         get_file(bprm->file);
598         fd_install(elf_exec_fileno = retval, bprm->file);
599
600         elf_ppnt = elf_phdata;
601         elf_bss = 0;
602         elf_brk = 0;
603
604         start_code = ~0UL;
605         end_code = 0;
606         start_data = 0;
607         end_data = 0;
608
609         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
610                 if (elf_ppnt->p_type == PT_INTERP) {
611                         /* This is the program interpreter used for
612                          * shared libraries - for now assume that this
613                          * is an a.out format binary
614                          */
615                         retval = -ENOEXEC;
616                         if (elf_ppnt->p_filesz > PATH_MAX || 
617                             elf_ppnt->p_filesz < 2)
618                                 goto out_free_file;
619
620                         retval = -ENOMEM;
621                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
622                                                   GFP_KERNEL);
623                         if (!elf_interpreter)
624                                 goto out_free_file;
625
626                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
627                                              elf_interpreter,
628                                              elf_ppnt->p_filesz);
629                         if (retval != elf_ppnt->p_filesz) {
630                                 if (retval >= 0)
631                                         retval = -EIO;
632                                 goto out_free_interp;
633                         }
634                         /* make sure path is NULL terminated */
635                         retval = -ENOEXEC;
636                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
637                                 goto out_free_interp;
638
639                         /*
640                          * The early SET_PERSONALITY here is so that the lookup
641                          * for the interpreter happens in the namespace of the 
642                          * to-be-execed image.  SET_PERSONALITY can select an
643                          * alternate root.
644                          *
645                          * However, SET_PERSONALITY is NOT allowed to switch
646                          * this task into the new images's memory mapping
647                          * policy - that is, TASK_SIZE must still evaluate to
648                          * that which is appropriate to the execing application.
649                          * This is because exit_mmap() needs to have TASK_SIZE
650                          * evaluate to the size of the old image.
651                          *
652                          * So if (say) a 64-bit application is execing a 32-bit
653                          * application it is the architecture's responsibility
654                          * to defer changing the value of TASK_SIZE until the
655                          * switch really is going to happen - do this in
656                          * flush_thread().      - akpm
657                          */
658                         SET_PERSONALITY(loc->elf_ex, 0);
659
660                         interpreter = open_exec(elf_interpreter);
661                         retval = PTR_ERR(interpreter);
662                         if (IS_ERR(interpreter))
663                                 goto out_free_interp;
664
665                         /*
666                          * If the binary is not readable then enforce
667                          * mm->dumpable = 0 regardless of the interpreter's
668                          * permissions.
669                          */
670                         if (file_permission(interpreter, MAY_READ) < 0)
671                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
672
673                         retval = kernel_read(interpreter, 0, bprm->buf,
674                                              BINPRM_BUF_SIZE);
675                         if (retval != BINPRM_BUF_SIZE) {
676                                 if (retval >= 0)
677                                         retval = -EIO;
678                                 goto out_free_dentry;
679                         }
680
681                         /* Get the exec headers */
682                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
683                         break;
684                 }
685                 elf_ppnt++;
686         }
687
688         elf_ppnt = elf_phdata;
689         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
690                 if (elf_ppnt->p_type == PT_GNU_STACK) {
691                         if (elf_ppnt->p_flags & PF_X)
692                                 executable_stack = EXSTACK_ENABLE_X;
693                         else
694                                 executable_stack = EXSTACK_DISABLE_X;
695                         break;
696                 }
697
698         /* Some simple consistency checks for the interpreter */
699         if (elf_interpreter) {
700                 retval = -ELIBBAD;
701                 /* Not an ELF interpreter */
702                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
703                         goto out_free_dentry;
704                 /* Verify the interpreter has a valid arch */
705                 if (!elf_check_arch(&loc->interp_elf_ex))
706                         goto out_free_dentry;
707         } else {
708                 /* Executables without an interpreter also need a personality  */
709                 SET_PERSONALITY(loc->elf_ex, 0);
710         }
711
712         /* Flush all traces of the currently running executable */
713         retval = flush_old_exec(bprm);
714         if (retval)
715                 goto out_free_dentry;
716
717         /* OK, This is the point of no return */
718         current->flags &= ~PF_FORKNOEXEC;
719         current->mm->def_flags = def_flags;
720
721         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
722            may depend on the personality.  */
723         SET_PERSONALITY(loc->elf_ex, 0);
724         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
725                 current->personality |= READ_IMPLIES_EXEC;
726
727         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
728                 current->flags |= PF_RANDOMIZE;
729         arch_pick_mmap_layout(current->mm);
730
731         /* Do this so that we can load the interpreter, if need be.  We will
732            change some of these later */
733         current->mm->free_area_cache = current->mm->mmap_base;
734         current->mm->cached_hole_size = 0;
735         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
736                                  executable_stack);
737         if (retval < 0) {
738                 send_sig(SIGKILL, current, 0);
739                 goto out_free_dentry;
740         }
741         
742         current->mm->start_stack = bprm->p;
743
744         /* Now we do a little grungy work by mmaping the ELF image into
745            the correct location in memory. */
746         for(i = 0, elf_ppnt = elf_phdata;
747             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
748                 int elf_prot = 0, elf_flags;
749                 unsigned long k, vaddr;
750
751                 if (elf_ppnt->p_type != PT_LOAD)
752                         continue;
753
754                 if (unlikely (elf_brk > elf_bss)) {
755                         unsigned long nbyte;
756                     
757                         /* There was a PT_LOAD segment with p_memsz > p_filesz
758                            before this one. Map anonymous pages, if needed,
759                            and clear the area.  */
760                         retval = set_brk (elf_bss + load_bias,
761                                           elf_brk + load_bias);
762                         if (retval) {
763                                 send_sig(SIGKILL, current, 0);
764                                 goto out_free_dentry;
765                         }
766                         nbyte = ELF_PAGEOFFSET(elf_bss);
767                         if (nbyte) {
768                                 nbyte = ELF_MIN_ALIGN - nbyte;
769                                 if (nbyte > elf_brk - elf_bss)
770                                         nbyte = elf_brk - elf_bss;
771                                 if (clear_user((void __user *)elf_bss +
772                                                         load_bias, nbyte)) {
773                                         /*
774                                          * This bss-zeroing can fail if the ELF
775                                          * file specifies odd protections. So
776                                          * we don't check the return value
777                                          */
778                                 }
779                         }
780                 }
781
782                 if (elf_ppnt->p_flags & PF_R)
783                         elf_prot |= PROT_READ;
784                 if (elf_ppnt->p_flags & PF_W)
785                         elf_prot |= PROT_WRITE;
786                 if (elf_ppnt->p_flags & PF_X)
787                         elf_prot |= PROT_EXEC;
788
789                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
790
791                 vaddr = elf_ppnt->p_vaddr;
792                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
793                         elf_flags |= MAP_FIXED;
794                 } else if (loc->elf_ex.e_type == ET_DYN) {
795                         /* Try and get dynamic programs out of the way of the
796                          * default mmap base, as well as whatever program they
797                          * might try to exec.  This is because the brk will
798                          * follow the loader, and is not movable.  */
799 #ifdef CONFIG_X86
800                         load_bias = 0;
801 #else
802                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
803 #endif
804                 }
805
806                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
807                                 elf_prot, elf_flags, 0);
808                 if (BAD_ADDR(error)) {
809                         send_sig(SIGKILL, current, 0);
810                         retval = IS_ERR((void *)error) ?
811                                 PTR_ERR((void*)error) : -EINVAL;
812                         goto out_free_dentry;
813                 }
814
815                 if (!load_addr_set) {
816                         load_addr_set = 1;
817                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
818                         if (loc->elf_ex.e_type == ET_DYN) {
819                                 load_bias += error -
820                                              ELF_PAGESTART(load_bias + vaddr);
821                                 load_addr += load_bias;
822                                 reloc_func_desc = load_bias;
823                         }
824                 }
825                 k = elf_ppnt->p_vaddr;
826                 if (k < start_code)
827                         start_code = k;
828                 if (start_data < k)
829                         start_data = k;
830
831                 /*
832                  * Check to see if the section's size will overflow the
833                  * allowed task size. Note that p_filesz must always be
834                  * <= p_memsz so it is only necessary to check p_memsz.
835                  */
836                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
837                     elf_ppnt->p_memsz > TASK_SIZE ||
838                     TASK_SIZE - elf_ppnt->p_memsz < k) {
839                         /* set_brk can never work. Avoid overflows. */
840                         send_sig(SIGKILL, current, 0);
841                         retval = -EINVAL;
842                         goto out_free_dentry;
843                 }
844
845                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
846
847                 if (k > elf_bss)
848                         elf_bss = k;
849                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
850                         end_code = k;
851                 if (end_data < k)
852                         end_data = k;
853                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
854                 if (k > elf_brk)
855                         elf_brk = k;
856         }
857
858         loc->elf_ex.e_entry += load_bias;
859         elf_bss += load_bias;
860         elf_brk += load_bias;
861         start_code += load_bias;
862         end_code += load_bias;
863         start_data += load_bias;
864         end_data += load_bias;
865
866         /* Calling set_brk effectively mmaps the pages that we need
867          * for the bss and break sections.  We must do this before
868          * mapping in the interpreter, to make sure it doesn't wind
869          * up getting placed where the bss needs to go.
870          */
871         retval = set_brk(elf_bss, elf_brk);
872         if (retval) {
873                 send_sig(SIGKILL, current, 0);
874                 goto out_free_dentry;
875         }
876         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
877                 send_sig(SIGSEGV, current, 0);
878                 retval = -EFAULT; /* Nobody gets to see this, but.. */
879                 goto out_free_dentry;
880         }
881
882         if (elf_interpreter) {
883                 unsigned long uninitialized_var(interp_map_addr);
884
885                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
886                                             interpreter,
887                                             &interp_map_addr,
888                                             load_bias);
889                 if (!IS_ERR((void *)elf_entry)) {
890                         /*
891                          * load_elf_interp() returns relocation
892                          * adjustment
893                          */
894                         interp_load_addr = elf_entry;
895                         elf_entry += loc->interp_elf_ex.e_entry;
896                 }
897                 if (BAD_ADDR(elf_entry)) {
898                         force_sig(SIGSEGV, current);
899                         retval = IS_ERR((void *)elf_entry) ?
900                                         (int)elf_entry : -EINVAL;
901                         goto out_free_dentry;
902                 }
903                 reloc_func_desc = interp_load_addr;
904
905                 allow_write_access(interpreter);
906                 fput(interpreter);
907                 kfree(elf_interpreter);
908         } else {
909                 elf_entry = loc->elf_ex.e_entry;
910                 if (BAD_ADDR(elf_entry)) {
911                         force_sig(SIGSEGV, current);
912                         retval = -EINVAL;
913                         goto out_free_dentry;
914                 }
915         }
916
917         kfree(elf_phdata);
918
919         sys_close(elf_exec_fileno);
920
921         set_binfmt(&elf_format);
922
923 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
924         retval = arch_setup_additional_pages(bprm, executable_stack);
925         if (retval < 0) {
926                 send_sig(SIGKILL, current, 0);
927                 goto out;
928         }
929 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
930
931         compute_creds(bprm);
932         current->flags &= ~PF_FORKNOEXEC;
933         retval = create_elf_tables(bprm, &loc->elf_ex,
934                           load_addr, interp_load_addr);
935         if (retval < 0) {
936                 send_sig(SIGKILL, current, 0);
937                 goto out;
938         }
939         /* N.B. passed_fileno might not be initialized? */
940         current->mm->end_code = end_code;
941         current->mm->start_code = start_code;
942         current->mm->start_data = start_data;
943         current->mm->end_data = end_data;
944         current->mm->start_stack = bprm->p;
945
946 #ifdef arch_randomize_brk
947         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
948                 current->mm->brk = current->mm->start_brk =
949                         arch_randomize_brk(current->mm);
950 #endif
951
952         if (current->personality & MMAP_PAGE_ZERO) {
953                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
954                    and some applications "depend" upon this behavior.
955                    Since we do not have the power to recompile these, we
956                    emulate the SVr4 behavior. Sigh. */
957                 down_write(&current->mm->mmap_sem);
958                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
959                                 MAP_FIXED | MAP_PRIVATE, 0);
960                 up_write(&current->mm->mmap_sem);
961         }
962
963 #ifdef ELF_PLAT_INIT
964         /*
965          * The ABI may specify that certain registers be set up in special
966          * ways (on i386 %edx is the address of a DT_FINI function, for
967          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
968          * that the e_entry field is the address of the function descriptor
969          * for the startup routine, rather than the address of the startup
970          * routine itself.  This macro performs whatever initialization to
971          * the regs structure is required as well as any relocations to the
972          * function descriptor entries when executing dynamically links apps.
973          */
974         ELF_PLAT_INIT(regs, reloc_func_desc);
975 #endif
976
977         start_thread(regs, elf_entry, bprm->p);
978         if (unlikely(current->ptrace & PT_PTRACED)) {
979                 if (current->ptrace & PT_TRACE_EXEC)
980                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
981                 else
982                         send_sig(SIGTRAP, current, 0);
983         }
984         retval = 0;
985 out:
986         kfree(loc);
987 out_ret:
988         return retval;
989
990         /* error cleanup */
991 out_free_dentry:
992         allow_write_access(interpreter);
993         if (interpreter)
994                 fput(interpreter);
995 out_free_interp:
996         kfree(elf_interpreter);
997 out_free_file:
998         sys_close(elf_exec_fileno);
999 out_free_ph:
1000         kfree(elf_phdata);
1001         goto out;
1002 }
1003
1004 /* This is really simpleminded and specialized - we are loading an
1005    a.out library that is given an ELF header. */
1006 static int load_elf_library(struct file *file)
1007 {
1008         struct elf_phdr *elf_phdata;
1009         struct elf_phdr *eppnt;
1010         unsigned long elf_bss, bss, len;
1011         int retval, error, i, j;
1012         struct elfhdr elf_ex;
1013
1014         error = -ENOEXEC;
1015         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1016         if (retval != sizeof(elf_ex))
1017                 goto out;
1018
1019         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1020                 goto out;
1021
1022         /* First of all, some simple consistency checks */
1023         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1024             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1025                 goto out;
1026
1027         /* Now read in all of the header information */
1028
1029         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1030         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1031
1032         error = -ENOMEM;
1033         elf_phdata = kmalloc(j, GFP_KERNEL);
1034         if (!elf_phdata)
1035                 goto out;
1036
1037         eppnt = elf_phdata;
1038         error = -ENOEXEC;
1039         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1040         if (retval != j)
1041                 goto out_free_ph;
1042
1043         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1044                 if ((eppnt + i)->p_type == PT_LOAD)
1045                         j++;
1046         if (j != 1)
1047                 goto out_free_ph;
1048
1049         while (eppnt->p_type != PT_LOAD)
1050                 eppnt++;
1051
1052         /* Now use mmap to map the library into memory. */
1053         down_write(&current->mm->mmap_sem);
1054         error = do_mmap(file,
1055                         ELF_PAGESTART(eppnt->p_vaddr),
1056                         (eppnt->p_filesz +
1057                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1058                         PROT_READ | PROT_WRITE | PROT_EXEC,
1059                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1060                         (eppnt->p_offset -
1061                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1062         up_write(&current->mm->mmap_sem);
1063         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1064                 goto out_free_ph;
1065
1066         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1067         if (padzero(elf_bss)) {
1068                 error = -EFAULT;
1069                 goto out_free_ph;
1070         }
1071
1072         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1073                             ELF_MIN_ALIGN - 1);
1074         bss = eppnt->p_memsz + eppnt->p_vaddr;
1075         if (bss > len) {
1076                 down_write(&current->mm->mmap_sem);
1077                 do_brk(len, bss - len);
1078                 up_write(&current->mm->mmap_sem);
1079         }
1080         error = 0;
1081
1082 out_free_ph:
1083         kfree(elf_phdata);
1084 out:
1085         return error;
1086 }
1087
1088 /*
1089  * Note that some platforms still use traditional core dumps and not
1090  * the ELF core dump.  Each platform can select it as appropriate.
1091  */
1092 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1093
1094 /*
1095  * ELF core dumper
1096  *
1097  * Modelled on fs/exec.c:aout_core_dump()
1098  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1099  */
1100 /*
1101  * These are the only things you should do on a core-file: use only these
1102  * functions to write out all the necessary info.
1103  */
1104 static int dump_write(struct file *file, const void *addr, int nr)
1105 {
1106         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1107 }
1108
1109 static int dump_seek(struct file *file, loff_t off)
1110 {
1111         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1112                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1113                         return 0;
1114         } else {
1115                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1116                 if (!buf)
1117                         return 0;
1118                 while (off > 0) {
1119                         unsigned long n = off;
1120                         if (n > PAGE_SIZE)
1121                                 n = PAGE_SIZE;
1122                         if (!dump_write(file, buf, n))
1123                                 return 0;
1124                         off -= n;
1125                 }
1126                 free_page((unsigned long)buf);
1127         }
1128         return 1;
1129 }
1130
1131 /*
1132  * Decide what to dump of a segment, part, all or none.
1133  */
1134 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1135                                    unsigned long mm_flags)
1136 {
1137         /* The vma can be set up to tell us the answer directly.  */
1138         if (vma->vm_flags & VM_ALWAYSDUMP)
1139                 goto whole;
1140
1141         /* Do not dump I/O mapped devices or special mappings */
1142         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1143                 return 0;
1144
1145 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1146
1147         /* By default, dump shared memory if mapped from an anonymous file. */
1148         if (vma->vm_flags & VM_SHARED) {
1149                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1150                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1151                         goto whole;
1152                 return 0;
1153         }
1154
1155         /* Dump segments that have been written to.  */
1156         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1157                 goto whole;
1158         if (vma->vm_file == NULL)
1159                 return 0;
1160
1161         if (FILTER(MAPPED_PRIVATE))
1162                 goto whole;
1163
1164         /*
1165          * If this looks like the beginning of a DSO or executable mapping,
1166          * check for an ELF header.  If we find one, dump the first page to
1167          * aid in determining what was mapped here.
1168          */
1169         if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1170                 u32 __user *header = (u32 __user *) vma->vm_start;
1171                 u32 word;
1172                 /*
1173                  * Doing it this way gets the constant folded by GCC.
1174                  */
1175                 union {
1176                         u32 cmp;
1177                         char elfmag[SELFMAG];
1178                 } magic;
1179                 BUILD_BUG_ON(SELFMAG != sizeof word);
1180                 magic.elfmag[EI_MAG0] = ELFMAG0;
1181                 magic.elfmag[EI_MAG1] = ELFMAG1;
1182                 magic.elfmag[EI_MAG2] = ELFMAG2;
1183                 magic.elfmag[EI_MAG3] = ELFMAG3;
1184                 if (get_user(word, header) == 0 && word == magic.cmp)
1185                         return PAGE_SIZE;
1186         }
1187
1188 #undef  FILTER
1189
1190         return 0;
1191
1192 whole:
1193         return vma->vm_end - vma->vm_start;
1194 }
1195
1196 /* An ELF note in memory */
1197 struct memelfnote
1198 {
1199         const char *name;
1200         int type;
1201         unsigned int datasz;
1202         void *data;
1203 };
1204
1205 static int notesize(struct memelfnote *en)
1206 {
1207         int sz;
1208
1209         sz = sizeof(struct elf_note);
1210         sz += roundup(strlen(en->name) + 1, 4);
1211         sz += roundup(en->datasz, 4);
1212
1213         return sz;
1214 }
1215
1216 #define DUMP_WRITE(addr, nr, foffset)   \
1217         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1218
1219 static int alignfile(struct file *file, loff_t *foffset)
1220 {
1221         static const char buf[4] = { 0, };
1222         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1223         return 1;
1224 }
1225
1226 static int writenote(struct memelfnote *men, struct file *file,
1227                         loff_t *foffset)
1228 {
1229         struct elf_note en;
1230         en.n_namesz = strlen(men->name) + 1;
1231         en.n_descsz = men->datasz;
1232         en.n_type = men->type;
1233
1234         DUMP_WRITE(&en, sizeof(en), foffset);
1235         DUMP_WRITE(men->name, en.n_namesz, foffset);
1236         if (!alignfile(file, foffset))
1237                 return 0;
1238         DUMP_WRITE(men->data, men->datasz, foffset);
1239         if (!alignfile(file, foffset))
1240                 return 0;
1241
1242         return 1;
1243 }
1244 #undef DUMP_WRITE
1245
1246 #define DUMP_WRITE(addr, nr)    \
1247         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1248                 goto end_coredump;
1249 #define DUMP_SEEK(off)  \
1250         if (!dump_seek(file, (off))) \
1251                 goto end_coredump;
1252
1253 static void fill_elf_header(struct elfhdr *elf, int segs,
1254                             u16 machine, u32 flags, u8 osabi)
1255 {
1256         memset(elf, 0, sizeof(*elf));
1257
1258         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1259         elf->e_ident[EI_CLASS] = ELF_CLASS;
1260         elf->e_ident[EI_DATA] = ELF_DATA;
1261         elf->e_ident[EI_VERSION] = EV_CURRENT;
1262         elf->e_ident[EI_OSABI] = ELF_OSABI;
1263
1264         elf->e_type = ET_CORE;
1265         elf->e_machine = machine;
1266         elf->e_version = EV_CURRENT;
1267         elf->e_phoff = sizeof(struct elfhdr);
1268         elf->e_flags = flags;
1269         elf->e_ehsize = sizeof(struct elfhdr);
1270         elf->e_phentsize = sizeof(struct elf_phdr);
1271         elf->e_phnum = segs;
1272
1273         return;
1274 }
1275
1276 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1277 {
1278         phdr->p_type = PT_NOTE;
1279         phdr->p_offset = offset;
1280         phdr->p_vaddr = 0;
1281         phdr->p_paddr = 0;
1282         phdr->p_filesz = sz;
1283         phdr->p_memsz = 0;
1284         phdr->p_flags = 0;
1285         phdr->p_align = 0;
1286         return;
1287 }
1288
1289 static void fill_note(struct memelfnote *note, const char *name, int type, 
1290                 unsigned int sz, void *data)
1291 {
1292         note->name = name;
1293         note->type = type;
1294         note->datasz = sz;
1295         note->data = data;
1296         return;
1297 }
1298
1299 /*
1300  * fill up all the fields in prstatus from the given task struct, except
1301  * registers which need to be filled up separately.
1302  */
1303 static void fill_prstatus(struct elf_prstatus *prstatus,
1304                 struct task_struct *p, long signr)
1305 {
1306         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1307         prstatus->pr_sigpend = p->pending.signal.sig[0];
1308         prstatus->pr_sighold = p->blocked.sig[0];
1309         prstatus->pr_pid = task_pid_vnr(p);
1310         prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1311         prstatus->pr_pgrp = task_pgrp_vnr(p);
1312         prstatus->pr_sid = task_session_vnr(p);
1313         if (thread_group_leader(p)) {
1314                 /*
1315                  * This is the record for the group leader.  Add in the
1316                  * cumulative times of previous dead threads.  This total
1317                  * won't include the time of each live thread whose state
1318                  * is included in the core dump.  The final total reported
1319                  * to our parent process when it calls wait4 will include
1320                  * those sums as well as the little bit more time it takes
1321                  * this and each other thread to finish dying after the
1322                  * core dump synchronization phase.
1323                  */
1324                 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1325                                    &prstatus->pr_utime);
1326                 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1327                                    &prstatus->pr_stime);
1328         } else {
1329                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1330                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1331         }
1332         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1333         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1334 }
1335
1336 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1337                        struct mm_struct *mm)
1338 {
1339         unsigned int i, len;
1340         
1341         /* first copy the parameters from user space */
1342         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1343
1344         len = mm->arg_end - mm->arg_start;
1345         if (len >= ELF_PRARGSZ)
1346                 len = ELF_PRARGSZ-1;
1347         if (copy_from_user(&psinfo->pr_psargs,
1348                            (const char __user *)mm->arg_start, len))
1349                 return -EFAULT;
1350         for(i = 0; i < len; i++)
1351                 if (psinfo->pr_psargs[i] == 0)
1352                         psinfo->pr_psargs[i] = ' ';
1353         psinfo->pr_psargs[len] = 0;
1354
1355         psinfo->pr_pid = task_pid_vnr(p);
1356         psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1357         psinfo->pr_pgrp = task_pgrp_vnr(p);
1358         psinfo->pr_sid = task_session_vnr(p);
1359
1360         i = p->state ? ffz(~p->state) + 1 : 0;
1361         psinfo->pr_state = i;
1362         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1363         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1364         psinfo->pr_nice = task_nice(p);
1365         psinfo->pr_flag = p->flags;
1366         SET_UID(psinfo->pr_uid, p->uid);
1367         SET_GID(psinfo->pr_gid, p->gid);
1368         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1369         
1370         return 0;
1371 }
1372
1373 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1374 {
1375         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1376         int i = 0;
1377         do
1378                 i += 2;
1379         while (auxv[i - 2] != AT_NULL);
1380         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1381 }
1382
1383 #ifdef CORE_DUMP_USE_REGSET
1384 #include <linux/regset.h>
1385
1386 struct elf_thread_core_info {
1387         struct elf_thread_core_info *next;
1388         struct task_struct *task;
1389         struct elf_prstatus prstatus;
1390         struct memelfnote notes[0];
1391 };
1392
1393 struct elf_note_info {
1394         struct elf_thread_core_info *thread;
1395         struct memelfnote psinfo;
1396         struct memelfnote auxv;
1397         size_t size;
1398         int thread_notes;
1399 };
1400
1401 /*
1402  * When a regset has a writeback hook, we call it on each thread before
1403  * dumping user memory.  On register window machines, this makes sure the
1404  * user memory backing the register data is up to date before we read it.
1405  */
1406 static void do_thread_regset_writeback(struct task_struct *task,
1407                                        const struct user_regset *regset)
1408 {
1409         if (regset->writeback)
1410                 regset->writeback(task, regset, 1);
1411 }
1412
1413 static int fill_thread_core_info(struct elf_thread_core_info *t,
1414                                  const struct user_regset_view *view,
1415                                  long signr, size_t *total)
1416 {
1417         unsigned int i;
1418
1419         /*
1420          * NT_PRSTATUS is the one special case, because the regset data
1421          * goes into the pr_reg field inside the note contents, rather
1422          * than being the whole note contents.  We fill the reset in here.
1423          * We assume that regset 0 is NT_PRSTATUS.
1424          */
1425         fill_prstatus(&t->prstatus, t->task, signr);
1426         (void) view->regsets[0].get(t->task, &view->regsets[0],
1427                                     0, sizeof(t->prstatus.pr_reg),
1428                                     &t->prstatus.pr_reg, NULL);
1429
1430         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1431                   sizeof(t->prstatus), &t->prstatus);
1432         *total += notesize(&t->notes[0]);
1433
1434         do_thread_regset_writeback(t->task, &view->regsets[0]);
1435
1436         /*
1437          * Each other regset might generate a note too.  For each regset
1438          * that has no core_note_type or is inactive, we leave t->notes[i]
1439          * all zero and we'll know to skip writing it later.
1440          */
1441         for (i = 1; i < view->n; ++i) {
1442                 const struct user_regset *regset = &view->regsets[i];
1443                 do_thread_regset_writeback(t->task, regset);
1444                 if (regset->core_note_type &&
1445                     (!regset->active || regset->active(t->task, regset))) {
1446                         int ret;
1447                         size_t size = regset->n * regset->size;
1448                         void *data = kmalloc(size, GFP_KERNEL);
1449                         if (unlikely(!data))
1450                                 return 0;
1451                         ret = regset->get(t->task, regset,
1452                                           0, size, data, NULL);
1453                         if (unlikely(ret))
1454                                 kfree(data);
1455                         else {
1456                                 if (regset->core_note_type != NT_PRFPREG)
1457                                         fill_note(&t->notes[i], "LINUX",
1458                                                   regset->core_note_type,
1459                                                   size, data);
1460                                 else {
1461                                         t->prstatus.pr_fpvalid = 1;
1462                                         fill_note(&t->notes[i], "CORE",
1463                                                   NT_PRFPREG, size, data);
1464                                 }
1465                                 *total += notesize(&t->notes[i]);
1466                         }
1467                 }
1468         }
1469
1470         return 1;
1471 }
1472
1473 static int fill_note_info(struct elfhdr *elf, int phdrs,
1474                           struct elf_note_info *info,
1475                           long signr, struct pt_regs *regs)
1476 {
1477         struct task_struct *dump_task = current;
1478         const struct user_regset_view *view = task_user_regset_view(dump_task);
1479         struct elf_thread_core_info *t;
1480         struct elf_prpsinfo *psinfo;
1481         struct task_struct *g, *p;
1482         unsigned int i;
1483
1484         info->size = 0;
1485         info->thread = NULL;
1486
1487         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1488         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1489
1490         if (psinfo == NULL)
1491                 return 0;
1492
1493         /*
1494          * Figure out how many notes we're going to need for each thread.
1495          */
1496         info->thread_notes = 0;
1497         for (i = 0; i < view->n; ++i)
1498                 if (view->regsets[i].core_note_type != 0)
1499                         ++info->thread_notes;
1500
1501         /*
1502          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1503          * since it is our one special case.
1504          */
1505         if (unlikely(info->thread_notes == 0) ||
1506             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1507                 WARN_ON(1);
1508                 return 0;
1509         }
1510
1511         /*
1512          * Initialize the ELF file header.
1513          */
1514         fill_elf_header(elf, phdrs,
1515                         view->e_machine, view->e_flags, view->ei_osabi);
1516
1517         /*
1518          * Allocate a structure for each thread.
1519          */
1520         rcu_read_lock();
1521         do_each_thread(g, p)
1522                 if (p->mm == dump_task->mm) {
1523                         if (p->flags & PF_KTHREAD)
1524                                 continue;
1525
1526                         t = kzalloc(offsetof(struct elf_thread_core_info,
1527                                              notes[info->thread_notes]),
1528                                     GFP_ATOMIC);
1529                         if (unlikely(!t)) {
1530                                 rcu_read_unlock();
1531                                 return 0;
1532                         }
1533                         t->task = p;
1534                         if (p == dump_task || !info->thread) {
1535                                 t->next = info->thread;
1536                                 info->thread = t;
1537                         } else {
1538                                 /*
1539                                  * Make sure to keep the original task at
1540                                  * the head of the list.
1541                                  */
1542                                 t->next = info->thread->next;
1543                                 info->thread->next = t;
1544                         }
1545                 }
1546         while_each_thread(g, p);
1547         rcu_read_unlock();
1548
1549         /*
1550          * Now fill in each thread's information.
1551          */
1552         for (t = info->thread; t != NULL; t = t->next)
1553                 if (!fill_thread_core_info(t, view, signr, &info->size))
1554                         return 0;
1555
1556         /*
1557          * Fill in the two process-wide notes.
1558          */
1559         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1560         info->size += notesize(&info->psinfo);
1561
1562         fill_auxv_note(&info->auxv, current->mm);
1563         info->size += notesize(&info->auxv);
1564
1565         return 1;
1566 }
1567
1568 static size_t get_note_info_size(struct elf_note_info *info)
1569 {
1570         return info->size;
1571 }
1572
1573 /*
1574  * Write all the notes for each thread.  When writing the first thread, the
1575  * process-wide notes are interleaved after the first thread-specific note.
1576  */
1577 static int write_note_info(struct elf_note_info *info,
1578                            struct file *file, loff_t *foffset)
1579 {
1580         bool first = 1;
1581         struct elf_thread_core_info *t = info->thread;
1582
1583         do {
1584                 int i;
1585
1586                 if (!writenote(&t->notes[0], file, foffset))
1587                         return 0;
1588
1589                 if (first && !writenote(&info->psinfo, file, foffset))
1590                         return 0;
1591                 if (first && !writenote(&info->auxv, file, foffset))
1592                         return 0;
1593
1594                 for (i = 1; i < info->thread_notes; ++i)
1595                         if (t->notes[i].data &&
1596                             !writenote(&t->notes[i], file, foffset))
1597                                 return 0;
1598
1599                 first = 0;
1600                 t = t->next;
1601         } while (t);
1602
1603         return 1;
1604 }
1605
1606 static void free_note_info(struct elf_note_info *info)
1607 {
1608         struct elf_thread_core_info *threads = info->thread;
1609         while (threads) {
1610                 unsigned int i;
1611                 struct elf_thread_core_info *t = threads;
1612                 threads = t->next;
1613                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1614                 for (i = 1; i < info->thread_notes; ++i)
1615                         kfree(t->notes[i].data);
1616                 kfree(t);
1617         }
1618         kfree(info->psinfo.data);
1619 }
1620
1621 #else
1622
1623 /* Here is the structure in which status of each thread is captured. */
1624 struct elf_thread_status
1625 {
1626         struct list_head list;
1627         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1628         elf_fpregset_t fpu;             /* NT_PRFPREG */
1629         struct task_struct *thread;
1630 #ifdef ELF_CORE_COPY_XFPREGS
1631         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1632 #endif
1633         struct memelfnote notes[3];
1634         int num_notes;
1635 };
1636
1637 /*
1638  * In order to add the specific thread information for the elf file format,
1639  * we need to keep a linked list of every threads pr_status and then create
1640  * a single section for them in the final core file.
1641  */
1642 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1643 {
1644         int sz = 0;
1645         struct task_struct *p = t->thread;
1646         t->num_notes = 0;
1647
1648         fill_prstatus(&t->prstatus, p, signr);
1649         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1650         
1651         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1652                   &(t->prstatus));
1653         t->num_notes++;
1654         sz += notesize(&t->notes[0]);
1655
1656         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1657                                                                 &t->fpu))) {
1658                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1659                           &(t->fpu));
1660                 t->num_notes++;
1661                 sz += notesize(&t->notes[1]);
1662         }
1663
1664 #ifdef ELF_CORE_COPY_XFPREGS
1665         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1666                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1667                           sizeof(t->xfpu), &t->xfpu);
1668                 t->num_notes++;
1669                 sz += notesize(&t->notes[2]);
1670         }
1671 #endif  
1672         return sz;
1673 }
1674
1675 struct elf_note_info {
1676         struct memelfnote *notes;
1677         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1678         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1679         struct list_head thread_list;
1680         elf_fpregset_t *fpu;
1681 #ifdef ELF_CORE_COPY_XFPREGS
1682         elf_fpxregset_t *xfpu;
1683 #endif
1684         int thread_status_size;
1685         int numnote;
1686 };
1687
1688 static int fill_note_info(struct elfhdr *elf, int phdrs,
1689                           struct elf_note_info *info,
1690                           long signr, struct pt_regs *regs)
1691 {
1692 #define NUM_NOTES       6
1693         struct list_head *t;
1694         struct task_struct *g, *p;
1695
1696         info->notes = NULL;
1697         info->prstatus = NULL;
1698         info->psinfo = NULL;
1699         info->fpu = NULL;
1700 #ifdef ELF_CORE_COPY_XFPREGS
1701         info->xfpu = NULL;
1702 #endif
1703         INIT_LIST_HEAD(&info->thread_list);
1704
1705         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1706                               GFP_KERNEL);
1707         if (!info->notes)
1708                 return 0;
1709         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1710         if (!info->psinfo)
1711                 return 0;
1712         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1713         if (!info->prstatus)
1714                 return 0;
1715         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1716         if (!info->fpu)
1717                 return 0;
1718 #ifdef ELF_CORE_COPY_XFPREGS
1719         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1720         if (!info->xfpu)
1721                 return 0;
1722 #endif
1723
1724         info->thread_status_size = 0;
1725         if (signr) {
1726                 struct elf_thread_status *ets;
1727                 rcu_read_lock();
1728                 do_each_thread(g, p)
1729                         if (current->mm == p->mm && current != p) {
1730                                 if (p->flags & PF_KTHREAD)
1731                                         continue;
1732
1733                                 ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
1734                                 if (!ets) {
1735                                         rcu_read_unlock();
1736                                         return 0;
1737                                 }
1738                                 ets->thread = p;
1739                                 list_add(&ets->list, &info->thread_list);
1740                         }
1741                 while_each_thread(g, p);
1742                 rcu_read_unlock();
1743                 list_for_each(t, &info->thread_list) {
1744                         int sz;
1745
1746                         ets = list_entry(t, struct elf_thread_status, list);
1747                         sz = elf_dump_thread_status(signr, ets);
1748                         info->thread_status_size += sz;
1749                 }
1750         }
1751         /* now collect the dump for the current */
1752         memset(info->prstatus, 0, sizeof(*info->prstatus));
1753         fill_prstatus(info->prstatus, current, signr);
1754         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1755
1756         /* Set up header */
1757         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1758
1759         /*
1760          * Set up the notes in similar form to SVR4 core dumps made
1761          * with info from their /proc.
1762          */
1763
1764         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1765                   sizeof(*info->prstatus), info->prstatus);
1766         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1767         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1768                   sizeof(*info->psinfo), info->psinfo);
1769
1770         info->numnote = 2;
1771
1772         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1773
1774         /* Try to dump the FPU. */
1775         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1776                                                                info->fpu);
1777         if (info->prstatus->pr_fpvalid)
1778                 fill_note(info->notes + info->numnote++,
1779                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1780 #ifdef ELF_CORE_COPY_XFPREGS
1781         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1782                 fill_note(info->notes + info->numnote++,
1783                           "LINUX", ELF_CORE_XFPREG_TYPE,
1784                           sizeof(*info->xfpu), info->xfpu);
1785 #endif
1786
1787         return 1;
1788
1789 #undef NUM_NOTES
1790 }
1791
1792 static size_t get_note_info_size(struct elf_note_info *info)
1793 {
1794         int sz = 0;
1795         int i;
1796
1797         for (i = 0; i < info->numnote; i++)
1798                 sz += notesize(info->notes + i);
1799
1800         sz += info->thread_status_size;
1801
1802         return sz;
1803 }
1804
1805 static int write_note_info(struct elf_note_info *info,
1806                            struct file *file, loff_t *foffset)
1807 {
1808         int i;
1809         struct list_head *t;
1810
1811         for (i = 0; i < info->numnote; i++)
1812                 if (!writenote(info->notes + i, file, foffset))
1813                         return 0;
1814
1815         /* write out the thread status notes section */
1816         list_for_each(t, &info->thread_list) {
1817                 struct elf_thread_status *tmp =
1818                                 list_entry(t, struct elf_thread_status, list);
1819
1820                 for (i = 0; i < tmp->num_notes; i++)
1821                         if (!writenote(&tmp->notes[i], file, foffset))
1822                                 return 0;
1823         }
1824
1825         return 1;
1826 }
1827
1828 static void free_note_info(struct elf_note_info *info)
1829 {
1830         while (!list_empty(&info->thread_list)) {
1831                 struct list_head *tmp = info->thread_list.next;
1832                 list_del(tmp);
1833                 kfree(list_entry(tmp, struct elf_thread_status, list));
1834         }
1835
1836         kfree(info->prstatus);
1837         kfree(info->psinfo);
1838         kfree(info->notes);
1839         kfree(info->fpu);
1840 #ifdef ELF_CORE_COPY_XFPREGS
1841         kfree(info->xfpu);
1842 #endif
1843 }
1844
1845 #endif
1846
1847 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1848                                         struct vm_area_struct *gate_vma)
1849 {
1850         struct vm_area_struct *ret = tsk->mm->mmap;
1851
1852         if (ret)
1853                 return ret;
1854         return gate_vma;
1855 }
1856 /*
1857  * Helper function for iterating across a vma list.  It ensures that the caller
1858  * will visit `gate_vma' prior to terminating the search.
1859  */
1860 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1861                                         struct vm_area_struct *gate_vma)
1862 {
1863         struct vm_area_struct *ret;
1864
1865         ret = this_vma->vm_next;
1866         if (ret)
1867                 return ret;
1868         if (this_vma == gate_vma)
1869                 return NULL;
1870         return gate_vma;
1871 }
1872
1873 /*
1874  * Actual dumper
1875  *
1876  * This is a two-pass process; first we find the offsets of the bits,
1877  * and then they are actually written out.  If we run out of core limit
1878  * we just truncate.
1879  */
1880 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1881 {
1882         int has_dumped = 0;
1883         mm_segment_t fs;
1884         int segs;
1885         size_t size = 0;
1886         struct vm_area_struct *vma, *gate_vma;
1887         struct elfhdr *elf = NULL;
1888         loff_t offset = 0, dataoff, foffset;
1889         unsigned long mm_flags;
1890         struct elf_note_info info;
1891
1892         /*
1893          * We no longer stop all VM operations.
1894          * 
1895          * This is because those proceses that could possibly change map_count
1896          * or the mmap / vma pages are now blocked in do_exit on current
1897          * finishing this core dump.
1898          *
1899          * Only ptrace can touch these memory addresses, but it doesn't change
1900          * the map_count or the pages allocated. So no possibility of crashing
1901          * exists while dumping the mm->vm_next areas to the core file.
1902          */
1903   
1904         /* alloc memory for large data structures: too large to be on stack */
1905         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1906         if (!elf)
1907                 goto out;
1908         
1909         segs = current->mm->map_count;
1910 #ifdef ELF_CORE_EXTRA_PHDRS
1911         segs += ELF_CORE_EXTRA_PHDRS;
1912 #endif
1913
1914         gate_vma = get_gate_vma(current);
1915         if (gate_vma != NULL)
1916                 segs++;
1917
1918         /*
1919          * Collect all the non-memory information about the process for the
1920          * notes.  This also sets up the file header.
1921          */
1922         if (!fill_note_info(elf, segs + 1, /* including notes section */
1923                             &info, signr, regs))
1924                 goto cleanup;
1925
1926         has_dumped = 1;
1927         current->flags |= PF_DUMPCORE;
1928   
1929         fs = get_fs();
1930         set_fs(KERNEL_DS);
1931
1932         DUMP_WRITE(elf, sizeof(*elf));
1933         offset += sizeof(*elf);                         /* Elf header */
1934         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1935         foffset = offset;
1936
1937         /* Write notes phdr entry */
1938         {
1939                 struct elf_phdr phdr;
1940                 size_t sz = get_note_info_size(&info);
1941
1942                 sz += elf_coredump_extra_notes_size();
1943
1944                 fill_elf_note_phdr(&phdr, sz, offset);
1945                 offset += sz;
1946                 DUMP_WRITE(&phdr, sizeof(phdr));
1947         }
1948
1949         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1950
1951         /*
1952          * We must use the same mm->flags while dumping core to avoid
1953          * inconsistency between the program headers and bodies, otherwise an
1954          * unusable core file can be generated.
1955          */
1956         mm_flags = current->mm->flags;
1957
1958         /* Write program headers for segments dump */
1959         for (vma = first_vma(current, gate_vma); vma != NULL;
1960                         vma = next_vma(vma, gate_vma)) {
1961                 struct elf_phdr phdr;
1962
1963                 phdr.p_type = PT_LOAD;
1964                 phdr.p_offset = offset;
1965                 phdr.p_vaddr = vma->vm_start;
1966                 phdr.p_paddr = 0;
1967                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1968                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1969                 offset += phdr.p_filesz;
1970                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1971                 if (vma->vm_flags & VM_WRITE)
1972                         phdr.p_flags |= PF_W;
1973                 if (vma->vm_flags & VM_EXEC)
1974                         phdr.p_flags |= PF_X;
1975                 phdr.p_align = ELF_EXEC_PAGESIZE;
1976
1977                 DUMP_WRITE(&phdr, sizeof(phdr));
1978         }
1979
1980 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1981         ELF_CORE_WRITE_EXTRA_PHDRS;
1982 #endif
1983
1984         /* write out the notes section */
1985         if (!write_note_info(&info, file, &foffset))
1986                 goto end_coredump;
1987
1988         if (elf_coredump_extra_notes_write(file, &foffset))
1989                 goto end_coredump;
1990
1991         /* Align to page */
1992         DUMP_SEEK(dataoff - foffset);
1993
1994         for (vma = first_vma(current, gate_vma); vma != NULL;
1995                         vma = next_vma(vma, gate_vma)) {
1996                 unsigned long addr;
1997                 unsigned long end;
1998
1999                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2000
2001                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2002                         struct page *page;
2003                         struct vm_area_struct *tmp_vma;
2004
2005                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2006                                                 &page, &tmp_vma) <= 0) {
2007                                 DUMP_SEEK(PAGE_SIZE);
2008                         } else {
2009                                 if (page == ZERO_PAGE(0)) {
2010                                         if (!dump_seek(file, PAGE_SIZE)) {
2011                                                 page_cache_release(page);
2012                                                 goto end_coredump;
2013                                         }
2014                                 } else {
2015                                         void *kaddr;
2016                                         flush_cache_page(tmp_vma, addr,
2017                                                          page_to_pfn(page));
2018                                         kaddr = kmap(page);
2019                                         if ((size += PAGE_SIZE) > limit ||
2020                                             !dump_write(file, kaddr,
2021                                             PAGE_SIZE)) {
2022                                                 kunmap(page);
2023                                                 page_cache_release(page);
2024                                                 goto end_coredump;
2025                                         }
2026                                         kunmap(page);
2027                                 }
2028                                 page_cache_release(page);
2029                         }
2030                 }
2031         }
2032
2033 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2034         ELF_CORE_WRITE_EXTRA_DATA;
2035 #endif
2036
2037 end_coredump:
2038         set_fs(fs);
2039
2040 cleanup:
2041         free_note_info(&info);
2042         kfree(elf);
2043 out:
2044         return has_dumped;
2045 }
2046
2047 #endif          /* USE_ELF_CORE_DUMP */
2048
2049 static int __init init_elf_binfmt(void)
2050 {
2051         return register_binfmt(&elf_format);
2052 }
2053
2054 static void __exit exit_elf_binfmt(void)
2055 {
2056         /* Remove the COFF and ELF loaders. */
2057         unregister_binfmt(&elf_format);
2058 }
2059
2060 core_initcall(init_elf_binfmt);
2061 module_exit(exit_elf_binfmt);
2062 MODULE_LICENSE("GPL");