vfs: remove extraneous NULL d_inode check from do_filp_open
[safe/jmp/linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <asm/uaccess.h>
35 #include <asm/param.h>
36 #include <asm/page.h>
37
38 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
39 static int load_elf_library(struct file *);
40 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
41                                 int, int, unsigned long);
42
43 /*
44  * If we don't support core dumping, then supply a NULL so we
45  * don't even try.
46  */
47 #ifdef CONFIG_ELF_CORE
48 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
49 #else
50 #define elf_core_dump   NULL
51 #endif
52
53 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
54 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
55 #else
56 #define ELF_MIN_ALIGN   PAGE_SIZE
57 #endif
58
59 #ifndef ELF_CORE_EFLAGS
60 #define ELF_CORE_EFLAGS 0
61 #endif
62
63 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
64 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
65 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
66
67 static struct linux_binfmt elf_format = {
68                 .module         = THIS_MODULE,
69                 .load_binary    = load_elf_binary,
70                 .load_shlib     = load_elf_library,
71                 .core_dump      = elf_core_dump,
72                 .min_coredump   = ELF_EXEC_PAGESIZE,
73                 .hasvdso        = 1
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 #ifndef elf_map
319
320 static unsigned long elf_map(struct file *filep, unsigned long addr,
321                 struct elf_phdr *eppnt, int prot, int type,
322                 unsigned long total_size)
323 {
324         unsigned long map_addr;
325         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
326         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
327         addr = ELF_PAGESTART(addr);
328         size = ELF_PAGEALIGN(size);
329
330         /* mmap() will return -EINVAL if given a zero size, but a
331          * segment with zero filesize is perfectly valid */
332         if (!size)
333                 return addr;
334
335         down_write(&current->mm->mmap_sem);
336         /*
337         * total_size is the size of the ELF (interpreter) image.
338         * The _first_ mmap needs to know the full size, otherwise
339         * randomization might put this image into an overlapping
340         * position with the ELF binary image. (since size < total_size)
341         * So we first map the 'big' image - and unmap the remainder at
342         * the end. (which unmap is needed for ELF images with holes.)
343         */
344         if (total_size) {
345                 total_size = ELF_PAGEALIGN(total_size);
346                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
347                 if (!BAD_ADDR(map_addr))
348                         do_munmap(current->mm, map_addr+size, total_size-size);
349         } else
350                 map_addr = do_mmap(filep, addr, size, prot, type, off);
351
352         up_write(&current->mm->mmap_sem);
353         return(map_addr);
354 }
355
356 #endif /* !elf_map */
357
358 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
359 {
360         int i, first_idx = -1, last_idx = -1;
361
362         for (i = 0; i < nr; i++) {
363                 if (cmds[i].p_type == PT_LOAD) {
364                         last_idx = i;
365                         if (first_idx == -1)
366                                 first_idx = i;
367                 }
368         }
369         if (first_idx == -1)
370                 return 0;
371
372         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
373                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
374 }
375
376
377 /* This is much more generalized than the library routine read function,
378    so we keep this separate.  Technically the library read function
379    is only provided so that we can read a.out libraries that have
380    an ELF header */
381
382 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
383                 struct file *interpreter, unsigned long *interp_map_addr,
384                 unsigned long no_base)
385 {
386         struct elf_phdr *elf_phdata;
387         struct elf_phdr *eppnt;
388         unsigned long load_addr = 0;
389         int load_addr_set = 0;
390         unsigned long last_bss = 0, elf_bss = 0;
391         unsigned long error = ~0UL;
392         unsigned long total_size;
393         int retval, i, size;
394
395         /* First of all, some simple consistency checks */
396         if (interp_elf_ex->e_type != ET_EXEC &&
397             interp_elf_ex->e_type != ET_DYN)
398                 goto out;
399         if (!elf_check_arch(interp_elf_ex))
400                 goto out;
401         if (!interpreter->f_op || !interpreter->f_op->mmap)
402                 goto out;
403
404         /*
405          * If the size of this structure has changed, then punt, since
406          * we will be doing the wrong thing.
407          */
408         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
409                 goto out;
410         if (interp_elf_ex->e_phnum < 1 ||
411                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
412                 goto out;
413
414         /* Now read in all of the header information */
415         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
416         if (size > ELF_MIN_ALIGN)
417                 goto out;
418         elf_phdata = kmalloc(size, GFP_KERNEL);
419         if (!elf_phdata)
420                 goto out;
421
422         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
423                              (char *)elf_phdata,size);
424         error = -EIO;
425         if (retval != size) {
426                 if (retval < 0)
427                         error = retval; 
428                 goto out_close;
429         }
430
431         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
432         if (!total_size) {
433                 error = -EINVAL;
434                 goto out_close;
435         }
436
437         eppnt = elf_phdata;
438         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
439                 if (eppnt->p_type == PT_LOAD) {
440                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
441                         int elf_prot = 0;
442                         unsigned long vaddr = 0;
443                         unsigned long k, map_addr;
444
445                         if (eppnt->p_flags & PF_R)
446                                 elf_prot = PROT_READ;
447                         if (eppnt->p_flags & PF_W)
448                                 elf_prot |= PROT_WRITE;
449                         if (eppnt->p_flags & PF_X)
450                                 elf_prot |= PROT_EXEC;
451                         vaddr = eppnt->p_vaddr;
452                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
453                                 elf_type |= MAP_FIXED;
454                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
455                                 load_addr = -vaddr;
456
457                         map_addr = elf_map(interpreter, load_addr + vaddr,
458                                         eppnt, elf_prot, elf_type, total_size);
459                         total_size = 0;
460                         if (!*interp_map_addr)
461                                 *interp_map_addr = map_addr;
462                         error = map_addr;
463                         if (BAD_ADDR(map_addr))
464                                 goto out_close;
465
466                         if (!load_addr_set &&
467                             interp_elf_ex->e_type == ET_DYN) {
468                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
469                                 load_addr_set = 1;
470                         }
471
472                         /*
473                          * Check to see if the section's size will overflow the
474                          * allowed task size. Note that p_filesz must always be
475                          * <= p_memsize so it's only necessary to check p_memsz.
476                          */
477                         k = load_addr + eppnt->p_vaddr;
478                         if (BAD_ADDR(k) ||
479                             eppnt->p_filesz > eppnt->p_memsz ||
480                             eppnt->p_memsz > TASK_SIZE ||
481                             TASK_SIZE - eppnt->p_memsz < k) {
482                                 error = -ENOMEM;
483                                 goto out_close;
484                         }
485
486                         /*
487                          * Find the end of the file mapping for this phdr, and
488                          * keep track of the largest address we see for this.
489                          */
490                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
491                         if (k > elf_bss)
492                                 elf_bss = k;
493
494                         /*
495                          * Do the same thing for the memory mapping - between
496                          * elf_bss and last_bss is the bss section.
497                          */
498                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
499                         if (k > last_bss)
500                                 last_bss = k;
501                 }
502         }
503
504         if (last_bss > elf_bss) {
505                 /*
506                  * Now fill out the bss section.  First pad the last page up
507                  * to the page boundary, and then perform a mmap to make sure
508                  * that there are zero-mapped pages up to and including the
509                  * last bss page.
510                  */
511                 if (padzero(elf_bss)) {
512                         error = -EFAULT;
513                         goto out_close;
514                 }
515
516                 /* What we have mapped so far */
517                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
518
519                 /* Map the last of the bss segment */
520                 down_write(&current->mm->mmap_sem);
521                 error = do_brk(elf_bss, last_bss - elf_bss);
522                 up_write(&current->mm->mmap_sem);
523                 if (BAD_ADDR(error))
524                         goto out_close;
525         }
526
527         error = load_addr;
528
529 out_close:
530         kfree(elf_phdata);
531 out:
532         return error;
533 }
534
535 /*
536  * These are the functions used to load ELF style executables and shared
537  * libraries.  There is no binary dependent code anywhere else.
538  */
539
540 #define INTERPRETER_NONE 0
541 #define INTERPRETER_ELF 2
542
543 #ifndef STACK_RND_MASK
544 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
545 #endif
546
547 static unsigned long randomize_stack_top(unsigned long stack_top)
548 {
549         unsigned int random_variable = 0;
550
551         if ((current->flags & PF_RANDOMIZE) &&
552                 !(current->personality & ADDR_NO_RANDOMIZE)) {
553                 random_variable = get_random_int() & STACK_RND_MASK;
554                 random_variable <<= PAGE_SHIFT;
555         }
556 #ifdef CONFIG_STACK_GROWSUP
557         return PAGE_ALIGN(stack_top) + random_variable;
558 #else
559         return PAGE_ALIGN(stack_top) - random_variable;
560 #endif
561 }
562
563 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
564 {
565         struct file *interpreter = NULL; /* to shut gcc up */
566         unsigned long load_addr = 0, load_bias = 0;
567         int load_addr_set = 0;
568         char * elf_interpreter = NULL;
569         unsigned long error;
570         struct elf_phdr *elf_ppnt, *elf_phdata;
571         unsigned long elf_bss, elf_brk;
572         int retval, i;
573         unsigned int size;
574         unsigned long elf_entry;
575         unsigned long interp_load_addr = 0;
576         unsigned long start_code, end_code, start_data, end_data;
577         unsigned long reloc_func_desc = 0;
578         int executable_stack = EXSTACK_DEFAULT;
579         unsigned long def_flags = 0;
580         struct {
581                 struct elfhdr elf_ex;
582                 struct elfhdr interp_elf_ex;
583         } *loc;
584
585         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
586         if (!loc) {
587                 retval = -ENOMEM;
588                 goto out_ret;
589         }
590         
591         /* Get the exec-header */
592         loc->elf_ex = *((struct elfhdr *)bprm->buf);
593
594         retval = -ENOEXEC;
595         /* First of all, some simple consistency checks */
596         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
597                 goto out;
598
599         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
600                 goto out;
601         if (!elf_check_arch(&loc->elf_ex))
602                 goto out;
603         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
604                 goto out;
605
606         /* Now read in all of the header information */
607         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
608                 goto out;
609         if (loc->elf_ex.e_phnum < 1 ||
610                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
611                 goto out;
612         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
613         retval = -ENOMEM;
614         elf_phdata = kmalloc(size, GFP_KERNEL);
615         if (!elf_phdata)
616                 goto out;
617
618         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
619                              (char *)elf_phdata, size);
620         if (retval != size) {
621                 if (retval >= 0)
622                         retval = -EIO;
623                 goto out_free_ph;
624         }
625
626         elf_ppnt = elf_phdata;
627         elf_bss = 0;
628         elf_brk = 0;
629
630         start_code = ~0UL;
631         end_code = 0;
632         start_data = 0;
633         end_data = 0;
634
635         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
636                 if (elf_ppnt->p_type == PT_INTERP) {
637                         /* This is the program interpreter used for
638                          * shared libraries - for now assume that this
639                          * is an a.out format binary
640                          */
641                         retval = -ENOEXEC;
642                         if (elf_ppnt->p_filesz > PATH_MAX || 
643                             elf_ppnt->p_filesz < 2)
644                                 goto out_free_ph;
645
646                         retval = -ENOMEM;
647                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
648                                                   GFP_KERNEL);
649                         if (!elf_interpreter)
650                                 goto out_free_ph;
651
652                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
653                                              elf_interpreter,
654                                              elf_ppnt->p_filesz);
655                         if (retval != elf_ppnt->p_filesz) {
656                                 if (retval >= 0)
657                                         retval = -EIO;
658                                 goto out_free_interp;
659                         }
660                         /* make sure path is NULL terminated */
661                         retval = -ENOEXEC;
662                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663                                 goto out_free_interp;
664
665                         /*
666                          * The early SET_PERSONALITY here is so that the lookup
667                          * for the interpreter happens in the namespace of the 
668                          * to-be-execed image.  SET_PERSONALITY can select an
669                          * alternate root.
670                          *
671                          * However, SET_PERSONALITY is NOT allowed to switch
672                          * this task into the new images's memory mapping
673                          * policy - that is, TASK_SIZE must still evaluate to
674                          * that which is appropriate to the execing application.
675                          * This is because exit_mmap() needs to have TASK_SIZE
676                          * evaluate to the size of the old image.
677                          *
678                          * So if (say) a 64-bit application is execing a 32-bit
679                          * application it is the architecture's responsibility
680                          * to defer changing the value of TASK_SIZE until the
681                          * switch really is going to happen - do this in
682                          * flush_thread().      - akpm
683                          */
684                         SET_PERSONALITY(loc->elf_ex);
685
686                         interpreter = open_exec(elf_interpreter);
687                         retval = PTR_ERR(interpreter);
688                         if (IS_ERR(interpreter))
689                                 goto out_free_interp;
690
691                         /*
692                          * If the binary is not readable then enforce
693                          * mm->dumpable = 0 regardless of the interpreter's
694                          * permissions.
695                          */
696                         if (file_permission(interpreter, MAY_READ) < 0)
697                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
698
699                         retval = kernel_read(interpreter, 0, bprm->buf,
700                                              BINPRM_BUF_SIZE);
701                         if (retval != BINPRM_BUF_SIZE) {
702                                 if (retval >= 0)
703                                         retval = -EIO;
704                                 goto out_free_dentry;
705                         }
706
707                         /* Get the exec headers */
708                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
709                         break;
710                 }
711                 elf_ppnt++;
712         }
713
714         elf_ppnt = elf_phdata;
715         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
716                 if (elf_ppnt->p_type == PT_GNU_STACK) {
717                         if (elf_ppnt->p_flags & PF_X)
718                                 executable_stack = EXSTACK_ENABLE_X;
719                         else
720                                 executable_stack = EXSTACK_DISABLE_X;
721                         break;
722                 }
723
724         /* Some simple consistency checks for the interpreter */
725         if (elf_interpreter) {
726                 retval = -ELIBBAD;
727                 /* Not an ELF interpreter */
728                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
729                         goto out_free_dentry;
730                 /* Verify the interpreter has a valid arch */
731                 if (!elf_check_arch(&loc->interp_elf_ex))
732                         goto out_free_dentry;
733         } else {
734                 /* Executables without an interpreter also need a personality  */
735                 SET_PERSONALITY(loc->elf_ex);
736         }
737
738         /* Flush all traces of the currently running executable */
739         retval = flush_old_exec(bprm);
740         if (retval)
741                 goto out_free_dentry;
742
743         /* OK, This is the point of no return */
744         current->flags &= ~PF_FORKNOEXEC;
745         current->mm->def_flags = def_flags;
746
747         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
748            may depend on the personality.  */
749         SET_PERSONALITY(loc->elf_ex);
750         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
751                 current->personality |= READ_IMPLIES_EXEC;
752
753         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754                 current->flags |= PF_RANDOMIZE;
755         arch_pick_mmap_layout(current->mm);
756
757         /* Do this so that we can load the interpreter, if need be.  We will
758            change some of these later */
759         current->mm->free_area_cache = current->mm->mmap_base;
760         current->mm->cached_hole_size = 0;
761         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
762                                  executable_stack);
763         if (retval < 0) {
764                 send_sig(SIGKILL, current, 0);
765                 goto out_free_dentry;
766         }
767         
768         current->mm->start_stack = bprm->p;
769
770         /* Now we do a little grungy work by mmapping the ELF image into
771            the correct location in memory. */
772         for(i = 0, elf_ppnt = elf_phdata;
773             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
774                 int elf_prot = 0, elf_flags;
775                 unsigned long k, vaddr;
776
777                 if (elf_ppnt->p_type != PT_LOAD)
778                         continue;
779
780                 if (unlikely (elf_brk > elf_bss)) {
781                         unsigned long nbyte;
782                     
783                         /* There was a PT_LOAD segment with p_memsz > p_filesz
784                            before this one. Map anonymous pages, if needed,
785                            and clear the area.  */
786                         retval = set_brk (elf_bss + load_bias,
787                                           elf_brk + load_bias);
788                         if (retval) {
789                                 send_sig(SIGKILL, current, 0);
790                                 goto out_free_dentry;
791                         }
792                         nbyte = ELF_PAGEOFFSET(elf_bss);
793                         if (nbyte) {
794                                 nbyte = ELF_MIN_ALIGN - nbyte;
795                                 if (nbyte > elf_brk - elf_bss)
796                                         nbyte = elf_brk - elf_bss;
797                                 if (clear_user((void __user *)elf_bss +
798                                                         load_bias, nbyte)) {
799                                         /*
800                                          * This bss-zeroing can fail if the ELF
801                                          * file specifies odd protections. So
802                                          * we don't check the return value
803                                          */
804                                 }
805                         }
806                 }
807
808                 if (elf_ppnt->p_flags & PF_R)
809                         elf_prot |= PROT_READ;
810                 if (elf_ppnt->p_flags & PF_W)
811                         elf_prot |= PROT_WRITE;
812                 if (elf_ppnt->p_flags & PF_X)
813                         elf_prot |= PROT_EXEC;
814
815                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
816
817                 vaddr = elf_ppnt->p_vaddr;
818                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
819                         elf_flags |= MAP_FIXED;
820                 } else if (loc->elf_ex.e_type == ET_DYN) {
821                         /* Try and get dynamic programs out of the way of the
822                          * default mmap base, as well as whatever program they
823                          * might try to exec.  This is because the brk will
824                          * follow the loader, and is not movable.  */
825 #ifdef CONFIG_X86
826                         load_bias = 0;
827 #else
828                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
829 #endif
830                 }
831
832                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
833                                 elf_prot, elf_flags, 0);
834                 if (BAD_ADDR(error)) {
835                         send_sig(SIGKILL, current, 0);
836                         retval = IS_ERR((void *)error) ?
837                                 PTR_ERR((void*)error) : -EINVAL;
838                         goto out_free_dentry;
839                 }
840
841                 if (!load_addr_set) {
842                         load_addr_set = 1;
843                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
844                         if (loc->elf_ex.e_type == ET_DYN) {
845                                 load_bias += error -
846                                              ELF_PAGESTART(load_bias + vaddr);
847                                 load_addr += load_bias;
848                                 reloc_func_desc = load_bias;
849                         }
850                 }
851                 k = elf_ppnt->p_vaddr;
852                 if (k < start_code)
853                         start_code = k;
854                 if (start_data < k)
855                         start_data = k;
856
857                 /*
858                  * Check to see if the section's size will overflow the
859                  * allowed task size. Note that p_filesz must always be
860                  * <= p_memsz so it is only necessary to check p_memsz.
861                  */
862                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
863                     elf_ppnt->p_memsz > TASK_SIZE ||
864                     TASK_SIZE - elf_ppnt->p_memsz < k) {
865                         /* set_brk can never work. Avoid overflows. */
866                         send_sig(SIGKILL, current, 0);
867                         retval = -EINVAL;
868                         goto out_free_dentry;
869                 }
870
871                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
872
873                 if (k > elf_bss)
874                         elf_bss = k;
875                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
876                         end_code = k;
877                 if (end_data < k)
878                         end_data = k;
879                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
880                 if (k > elf_brk)
881                         elf_brk = k;
882         }
883
884         loc->elf_ex.e_entry += load_bias;
885         elf_bss += load_bias;
886         elf_brk += load_bias;
887         start_code += load_bias;
888         end_code += load_bias;
889         start_data += load_bias;
890         end_data += load_bias;
891
892         /* Calling set_brk effectively mmaps the pages that we need
893          * for the bss and break sections.  We must do this before
894          * mapping in the interpreter, to make sure it doesn't wind
895          * up getting placed where the bss needs to go.
896          */
897         retval = set_brk(elf_bss, elf_brk);
898         if (retval) {
899                 send_sig(SIGKILL, current, 0);
900                 goto out_free_dentry;
901         }
902         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
903                 send_sig(SIGSEGV, current, 0);
904                 retval = -EFAULT; /* Nobody gets to see this, but.. */
905                 goto out_free_dentry;
906         }
907
908         if (elf_interpreter) {
909                 unsigned long uninitialized_var(interp_map_addr);
910
911                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
912                                             interpreter,
913                                             &interp_map_addr,
914                                             load_bias);
915                 if (!IS_ERR((void *)elf_entry)) {
916                         /*
917                          * load_elf_interp() returns relocation
918                          * adjustment
919                          */
920                         interp_load_addr = elf_entry;
921                         elf_entry += loc->interp_elf_ex.e_entry;
922                 }
923                 if (BAD_ADDR(elf_entry)) {
924                         force_sig(SIGSEGV, current);
925                         retval = IS_ERR((void *)elf_entry) ?
926                                         (int)elf_entry : -EINVAL;
927                         goto out_free_dentry;
928                 }
929                 reloc_func_desc = interp_load_addr;
930
931                 allow_write_access(interpreter);
932                 fput(interpreter);
933                 kfree(elf_interpreter);
934         } else {
935                 elf_entry = loc->elf_ex.e_entry;
936                 if (BAD_ADDR(elf_entry)) {
937                         force_sig(SIGSEGV, current);
938                         retval = -EINVAL;
939                         goto out_free_dentry;
940                 }
941         }
942
943         kfree(elf_phdata);
944
945         set_binfmt(&elf_format);
946
947 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
948         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
949         if (retval < 0) {
950                 send_sig(SIGKILL, current, 0);
951                 goto out;
952         }
953 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
954
955         install_exec_creds(bprm);
956         current->flags &= ~PF_FORKNOEXEC;
957         retval = create_elf_tables(bprm, &loc->elf_ex,
958                           load_addr, interp_load_addr);
959         if (retval < 0) {
960                 send_sig(SIGKILL, current, 0);
961                 goto out;
962         }
963         /* N.B. passed_fileno might not be initialized? */
964         current->mm->end_code = end_code;
965         current->mm->start_code = start_code;
966         current->mm->start_data = start_data;
967         current->mm->end_data = end_data;
968         current->mm->start_stack = bprm->p;
969
970 #ifdef arch_randomize_brk
971         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
972                 current->mm->brk = current->mm->start_brk =
973                         arch_randomize_brk(current->mm);
974 #endif
975
976         if (current->personality & MMAP_PAGE_ZERO) {
977                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
978                    and some applications "depend" upon this behavior.
979                    Since we do not have the power to recompile these, we
980                    emulate the SVr4 behavior. Sigh. */
981                 down_write(&current->mm->mmap_sem);
982                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
983                                 MAP_FIXED | MAP_PRIVATE, 0);
984                 up_write(&current->mm->mmap_sem);
985         }
986
987 #ifdef ELF_PLAT_INIT
988         /*
989          * The ABI may specify that certain registers be set up in special
990          * ways (on i386 %edx is the address of a DT_FINI function, for
991          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
992          * that the e_entry field is the address of the function descriptor
993          * for the startup routine, rather than the address of the startup
994          * routine itself.  This macro performs whatever initialization to
995          * the regs structure is required as well as any relocations to the
996          * function descriptor entries when executing dynamically links apps.
997          */
998         ELF_PLAT_INIT(regs, reloc_func_desc);
999 #endif
1000
1001         start_thread(regs, elf_entry, bprm->p);
1002         retval = 0;
1003 out:
1004         kfree(loc);
1005 out_ret:
1006         return retval;
1007
1008         /* error cleanup */
1009 out_free_dentry:
1010         allow_write_access(interpreter);
1011         if (interpreter)
1012                 fput(interpreter);
1013 out_free_interp:
1014         kfree(elf_interpreter);
1015 out_free_ph:
1016         kfree(elf_phdata);
1017         goto out;
1018 }
1019
1020 /* This is really simpleminded and specialized - we are loading an
1021    a.out library that is given an ELF header. */
1022 static int load_elf_library(struct file *file)
1023 {
1024         struct elf_phdr *elf_phdata;
1025         struct elf_phdr *eppnt;
1026         unsigned long elf_bss, bss, len;
1027         int retval, error, i, j;
1028         struct elfhdr elf_ex;
1029
1030         error = -ENOEXEC;
1031         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1032         if (retval != sizeof(elf_ex))
1033                 goto out;
1034
1035         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1036                 goto out;
1037
1038         /* First of all, some simple consistency checks */
1039         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1040             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1041                 goto out;
1042
1043         /* Now read in all of the header information */
1044
1045         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1046         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1047
1048         error = -ENOMEM;
1049         elf_phdata = kmalloc(j, GFP_KERNEL);
1050         if (!elf_phdata)
1051                 goto out;
1052
1053         eppnt = elf_phdata;
1054         error = -ENOEXEC;
1055         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1056         if (retval != j)
1057                 goto out_free_ph;
1058
1059         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1060                 if ((eppnt + i)->p_type == PT_LOAD)
1061                         j++;
1062         if (j != 1)
1063                 goto out_free_ph;
1064
1065         while (eppnt->p_type != PT_LOAD)
1066                 eppnt++;
1067
1068         /* Now use mmap to map the library into memory. */
1069         down_write(&current->mm->mmap_sem);
1070         error = do_mmap(file,
1071                         ELF_PAGESTART(eppnt->p_vaddr),
1072                         (eppnt->p_filesz +
1073                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1074                         PROT_READ | PROT_WRITE | PROT_EXEC,
1075                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1076                         (eppnt->p_offset -
1077                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1078         up_write(&current->mm->mmap_sem);
1079         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1080                 goto out_free_ph;
1081
1082         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1083         if (padzero(elf_bss)) {
1084                 error = -EFAULT;
1085                 goto out_free_ph;
1086         }
1087
1088         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1089                             ELF_MIN_ALIGN - 1);
1090         bss = eppnt->p_memsz + eppnt->p_vaddr;
1091         if (bss > len) {
1092                 down_write(&current->mm->mmap_sem);
1093                 do_brk(len, bss - len);
1094                 up_write(&current->mm->mmap_sem);
1095         }
1096         error = 0;
1097
1098 out_free_ph:
1099         kfree(elf_phdata);
1100 out:
1101         return error;
1102 }
1103
1104 #ifdef CONFIG_ELF_CORE
1105 /*
1106  * ELF core dumper
1107  *
1108  * Modelled on fs/exec.c:aout_core_dump()
1109  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1110  */
1111 /*
1112  * These are the only things you should do on a core-file: use only these
1113  * functions to write out all the necessary info.
1114  */
1115 static int dump_write(struct file *file, const void *addr, int nr)
1116 {
1117         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1118 }
1119
1120 static int dump_seek(struct file *file, loff_t off)
1121 {
1122         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1123                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1124                         return 0;
1125         } else {
1126                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1127                 if (!buf)
1128                         return 0;
1129                 while (off > 0) {
1130                         unsigned long n = off;
1131                         if (n > PAGE_SIZE)
1132                                 n = PAGE_SIZE;
1133                         if (!dump_write(file, buf, n))
1134                                 return 0;
1135                         off -= n;
1136                 }
1137                 free_page((unsigned long)buf);
1138         }
1139         return 1;
1140 }
1141
1142 /*
1143  * Decide what to dump of a segment, part, all or none.
1144  */
1145 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1146                                    unsigned long mm_flags)
1147 {
1148 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1149
1150         /* The vma can be set up to tell us the answer directly.  */
1151         if (vma->vm_flags & VM_ALWAYSDUMP)
1152                 goto whole;
1153
1154         /* Hugetlb memory check */
1155         if (vma->vm_flags & VM_HUGETLB) {
1156                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1157                         goto whole;
1158                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1159                         goto whole;
1160         }
1161
1162         /* Do not dump I/O mapped devices or special mappings */
1163         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1164                 return 0;
1165
1166         /* By default, dump shared memory if mapped from an anonymous file. */
1167         if (vma->vm_flags & VM_SHARED) {
1168                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1169                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1170                         goto whole;
1171                 return 0;
1172         }
1173
1174         /* Dump segments that have been written to.  */
1175         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1176                 goto whole;
1177         if (vma->vm_file == NULL)
1178                 return 0;
1179
1180         if (FILTER(MAPPED_PRIVATE))
1181                 goto whole;
1182
1183         /*
1184          * If this looks like the beginning of a DSO or executable mapping,
1185          * check for an ELF header.  If we find one, dump the first page to
1186          * aid in determining what was mapped here.
1187          */
1188         if (FILTER(ELF_HEADERS) &&
1189             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1190                 u32 __user *header = (u32 __user *) vma->vm_start;
1191                 u32 word;
1192                 mm_segment_t fs = get_fs();
1193                 /*
1194                  * Doing it this way gets the constant folded by GCC.
1195                  */
1196                 union {
1197                         u32 cmp;
1198                         char elfmag[SELFMAG];
1199                 } magic;
1200                 BUILD_BUG_ON(SELFMAG != sizeof word);
1201                 magic.elfmag[EI_MAG0] = ELFMAG0;
1202                 magic.elfmag[EI_MAG1] = ELFMAG1;
1203                 magic.elfmag[EI_MAG2] = ELFMAG2;
1204                 magic.elfmag[EI_MAG3] = ELFMAG3;
1205                 /*
1206                  * Switch to the user "segment" for get_user(),
1207                  * then put back what elf_core_dump() had in place.
1208                  */
1209                 set_fs(USER_DS);
1210                 if (unlikely(get_user(word, header)))
1211                         word = 0;
1212                 set_fs(fs);
1213                 if (word == magic.cmp)
1214                         return PAGE_SIZE;
1215         }
1216
1217 #undef  FILTER
1218
1219         return 0;
1220
1221 whole:
1222         return vma->vm_end - vma->vm_start;
1223 }
1224
1225 /* An ELF note in memory */
1226 struct memelfnote
1227 {
1228         const char *name;
1229         int type;
1230         unsigned int datasz;
1231         void *data;
1232 };
1233
1234 static int notesize(struct memelfnote *en)
1235 {
1236         int sz;
1237
1238         sz = sizeof(struct elf_note);
1239         sz += roundup(strlen(en->name) + 1, 4);
1240         sz += roundup(en->datasz, 4);
1241
1242         return sz;
1243 }
1244
1245 #define DUMP_WRITE(addr, nr, foffset)   \
1246         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1247
1248 static int alignfile(struct file *file, loff_t *foffset)
1249 {
1250         static const char buf[4] = { 0, };
1251         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1252         return 1;
1253 }
1254
1255 static int writenote(struct memelfnote *men, struct file *file,
1256                         loff_t *foffset)
1257 {
1258         struct elf_note en;
1259         en.n_namesz = strlen(men->name) + 1;
1260         en.n_descsz = men->datasz;
1261         en.n_type = men->type;
1262
1263         DUMP_WRITE(&en, sizeof(en), foffset);
1264         DUMP_WRITE(men->name, en.n_namesz, foffset);
1265         if (!alignfile(file, foffset))
1266                 return 0;
1267         DUMP_WRITE(men->data, men->datasz, foffset);
1268         if (!alignfile(file, foffset))
1269                 return 0;
1270
1271         return 1;
1272 }
1273 #undef DUMP_WRITE
1274
1275 #define DUMP_WRITE(addr, nr)    \
1276         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1277                 goto end_coredump;
1278
1279 static void fill_elf_header(struct elfhdr *elf, int segs,
1280                             u16 machine, u32 flags, u8 osabi)
1281 {
1282         memset(elf, 0, sizeof(*elf));
1283
1284         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1285         elf->e_ident[EI_CLASS] = ELF_CLASS;
1286         elf->e_ident[EI_DATA] = ELF_DATA;
1287         elf->e_ident[EI_VERSION] = EV_CURRENT;
1288         elf->e_ident[EI_OSABI] = ELF_OSABI;
1289
1290         elf->e_type = ET_CORE;
1291         elf->e_machine = machine;
1292         elf->e_version = EV_CURRENT;
1293         elf->e_phoff = sizeof(struct elfhdr);
1294         elf->e_flags = flags;
1295         elf->e_ehsize = sizeof(struct elfhdr);
1296         elf->e_phentsize = sizeof(struct elf_phdr);
1297         elf->e_phnum = segs;
1298
1299         return;
1300 }
1301
1302 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1303 {
1304         phdr->p_type = PT_NOTE;
1305         phdr->p_offset = offset;
1306         phdr->p_vaddr = 0;
1307         phdr->p_paddr = 0;
1308         phdr->p_filesz = sz;
1309         phdr->p_memsz = 0;
1310         phdr->p_flags = 0;
1311         phdr->p_align = 0;
1312         return;
1313 }
1314
1315 static void fill_note(struct memelfnote *note, const char *name, int type, 
1316                 unsigned int sz, void *data)
1317 {
1318         note->name = name;
1319         note->type = type;
1320         note->datasz = sz;
1321         note->data = data;
1322         return;
1323 }
1324
1325 /*
1326  * fill up all the fields in prstatus from the given task struct, except
1327  * registers which need to be filled up separately.
1328  */
1329 static void fill_prstatus(struct elf_prstatus *prstatus,
1330                 struct task_struct *p, long signr)
1331 {
1332         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1333         prstatus->pr_sigpend = p->pending.signal.sig[0];
1334         prstatus->pr_sighold = p->blocked.sig[0];
1335         rcu_read_lock();
1336         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1337         rcu_read_unlock();
1338         prstatus->pr_pid = task_pid_vnr(p);
1339         prstatus->pr_pgrp = task_pgrp_vnr(p);
1340         prstatus->pr_sid = task_session_vnr(p);
1341         if (thread_group_leader(p)) {
1342                 struct task_cputime cputime;
1343
1344                 /*
1345                  * This is the record for the group leader.  It shows the
1346                  * group-wide total, not its individual thread total.
1347                  */
1348                 thread_group_cputime(p, &cputime);
1349                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1350                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1351         } else {
1352                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1353                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1354         }
1355         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1356         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1357 }
1358
1359 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1360                        struct mm_struct *mm)
1361 {
1362         const struct cred *cred;
1363         unsigned int i, len;
1364         
1365         /* first copy the parameters from user space */
1366         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1367
1368         len = mm->arg_end - mm->arg_start;
1369         if (len >= ELF_PRARGSZ)
1370                 len = ELF_PRARGSZ-1;
1371         if (copy_from_user(&psinfo->pr_psargs,
1372                            (const char __user *)mm->arg_start, len))
1373                 return -EFAULT;
1374         for(i = 0; i < len; i++)
1375                 if (psinfo->pr_psargs[i] == 0)
1376                         psinfo->pr_psargs[i] = ' ';
1377         psinfo->pr_psargs[len] = 0;
1378
1379         rcu_read_lock();
1380         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1381         rcu_read_unlock();
1382         psinfo->pr_pid = task_pid_vnr(p);
1383         psinfo->pr_pgrp = task_pgrp_vnr(p);
1384         psinfo->pr_sid = task_session_vnr(p);
1385
1386         i = p->state ? ffz(~p->state) + 1 : 0;
1387         psinfo->pr_state = i;
1388         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1389         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1390         psinfo->pr_nice = task_nice(p);
1391         psinfo->pr_flag = p->flags;
1392         rcu_read_lock();
1393         cred = __task_cred(p);
1394         SET_UID(psinfo->pr_uid, cred->uid);
1395         SET_GID(psinfo->pr_gid, cred->gid);
1396         rcu_read_unlock();
1397         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1398         
1399         return 0;
1400 }
1401
1402 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1403 {
1404         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1405         int i = 0;
1406         do
1407                 i += 2;
1408         while (auxv[i - 2] != AT_NULL);
1409         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1410 }
1411
1412 #ifdef CORE_DUMP_USE_REGSET
1413 #include <linux/regset.h>
1414
1415 struct elf_thread_core_info {
1416         struct elf_thread_core_info *next;
1417         struct task_struct *task;
1418         struct elf_prstatus prstatus;
1419         struct memelfnote notes[0];
1420 };
1421
1422 struct elf_note_info {
1423         struct elf_thread_core_info *thread;
1424         struct memelfnote psinfo;
1425         struct memelfnote auxv;
1426         size_t size;
1427         int thread_notes;
1428 };
1429
1430 /*
1431  * When a regset has a writeback hook, we call it on each thread before
1432  * dumping user memory.  On register window machines, this makes sure the
1433  * user memory backing the register data is up to date before we read it.
1434  */
1435 static void do_thread_regset_writeback(struct task_struct *task,
1436                                        const struct user_regset *regset)
1437 {
1438         if (regset->writeback)
1439                 regset->writeback(task, regset, 1);
1440 }
1441
1442 static int fill_thread_core_info(struct elf_thread_core_info *t,
1443                                  const struct user_regset_view *view,
1444                                  long signr, size_t *total)
1445 {
1446         unsigned int i;
1447
1448         /*
1449          * NT_PRSTATUS is the one special case, because the regset data
1450          * goes into the pr_reg field inside the note contents, rather
1451          * than being the whole note contents.  We fill the reset in here.
1452          * We assume that regset 0 is NT_PRSTATUS.
1453          */
1454         fill_prstatus(&t->prstatus, t->task, signr);
1455         (void) view->regsets[0].get(t->task, &view->regsets[0],
1456                                     0, sizeof(t->prstatus.pr_reg),
1457                                     &t->prstatus.pr_reg, NULL);
1458
1459         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1460                   sizeof(t->prstatus), &t->prstatus);
1461         *total += notesize(&t->notes[0]);
1462
1463         do_thread_regset_writeback(t->task, &view->regsets[0]);
1464
1465         /*
1466          * Each other regset might generate a note too.  For each regset
1467          * that has no core_note_type or is inactive, we leave t->notes[i]
1468          * all zero and we'll know to skip writing it later.
1469          */
1470         for (i = 1; i < view->n; ++i) {
1471                 const struct user_regset *regset = &view->regsets[i];
1472                 do_thread_regset_writeback(t->task, regset);
1473                 if (regset->core_note_type &&
1474                     (!regset->active || regset->active(t->task, regset))) {
1475                         int ret;
1476                         size_t size = regset->n * regset->size;
1477                         void *data = kmalloc(size, GFP_KERNEL);
1478                         if (unlikely(!data))
1479                                 return 0;
1480                         ret = regset->get(t->task, regset,
1481                                           0, size, data, NULL);
1482                         if (unlikely(ret))
1483                                 kfree(data);
1484                         else {
1485                                 if (regset->core_note_type != NT_PRFPREG)
1486                                         fill_note(&t->notes[i], "LINUX",
1487                                                   regset->core_note_type,
1488                                                   size, data);
1489                                 else {
1490                                         t->prstatus.pr_fpvalid = 1;
1491                                         fill_note(&t->notes[i], "CORE",
1492                                                   NT_PRFPREG, size, data);
1493                                 }
1494                                 *total += notesize(&t->notes[i]);
1495                         }
1496                 }
1497         }
1498
1499         return 1;
1500 }
1501
1502 static int fill_note_info(struct elfhdr *elf, int phdrs,
1503                           struct elf_note_info *info,
1504                           long signr, struct pt_regs *regs)
1505 {
1506         struct task_struct *dump_task = current;
1507         const struct user_regset_view *view = task_user_regset_view(dump_task);
1508         struct elf_thread_core_info *t;
1509         struct elf_prpsinfo *psinfo;
1510         struct core_thread *ct;
1511         unsigned int i;
1512
1513         info->size = 0;
1514         info->thread = NULL;
1515
1516         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1517         if (psinfo == NULL)
1518                 return 0;
1519
1520         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1521
1522         /*
1523          * Figure out how many notes we're going to need for each thread.
1524          */
1525         info->thread_notes = 0;
1526         for (i = 0; i < view->n; ++i)
1527                 if (view->regsets[i].core_note_type != 0)
1528                         ++info->thread_notes;
1529
1530         /*
1531          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1532          * since it is our one special case.
1533          */
1534         if (unlikely(info->thread_notes == 0) ||
1535             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1536                 WARN_ON(1);
1537                 return 0;
1538         }
1539
1540         /*
1541          * Initialize the ELF file header.
1542          */
1543         fill_elf_header(elf, phdrs,
1544                         view->e_machine, view->e_flags, view->ei_osabi);
1545
1546         /*
1547          * Allocate a structure for each thread.
1548          */
1549         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1550                 t = kzalloc(offsetof(struct elf_thread_core_info,
1551                                      notes[info->thread_notes]),
1552                             GFP_KERNEL);
1553                 if (unlikely(!t))
1554                         return 0;
1555
1556                 t->task = ct->task;
1557                 if (ct->task == dump_task || !info->thread) {
1558                         t->next = info->thread;
1559                         info->thread = t;
1560                 } else {
1561                         /*
1562                          * Make sure to keep the original task at
1563                          * the head of the list.
1564                          */
1565                         t->next = info->thread->next;
1566                         info->thread->next = t;
1567                 }
1568         }
1569
1570         /*
1571          * Now fill in each thread's information.
1572          */
1573         for (t = info->thread; t != NULL; t = t->next)
1574                 if (!fill_thread_core_info(t, view, signr, &info->size))
1575                         return 0;
1576
1577         /*
1578          * Fill in the two process-wide notes.
1579          */
1580         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1581         info->size += notesize(&info->psinfo);
1582
1583         fill_auxv_note(&info->auxv, current->mm);
1584         info->size += notesize(&info->auxv);
1585
1586         return 1;
1587 }
1588
1589 static size_t get_note_info_size(struct elf_note_info *info)
1590 {
1591         return info->size;
1592 }
1593
1594 /*
1595  * Write all the notes for each thread.  When writing the first thread, the
1596  * process-wide notes are interleaved after the first thread-specific note.
1597  */
1598 static int write_note_info(struct elf_note_info *info,
1599                            struct file *file, loff_t *foffset)
1600 {
1601         bool first = 1;
1602         struct elf_thread_core_info *t = info->thread;
1603
1604         do {
1605                 int i;
1606
1607                 if (!writenote(&t->notes[0], file, foffset))
1608                         return 0;
1609
1610                 if (first && !writenote(&info->psinfo, file, foffset))
1611                         return 0;
1612                 if (first && !writenote(&info->auxv, file, foffset))
1613                         return 0;
1614
1615                 for (i = 1; i < info->thread_notes; ++i)
1616                         if (t->notes[i].data &&
1617                             !writenote(&t->notes[i], file, foffset))
1618                                 return 0;
1619
1620                 first = 0;
1621                 t = t->next;
1622         } while (t);
1623
1624         return 1;
1625 }
1626
1627 static void free_note_info(struct elf_note_info *info)
1628 {
1629         struct elf_thread_core_info *threads = info->thread;
1630         while (threads) {
1631                 unsigned int i;
1632                 struct elf_thread_core_info *t = threads;
1633                 threads = t->next;
1634                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1635                 for (i = 1; i < info->thread_notes; ++i)
1636                         kfree(t->notes[i].data);
1637                 kfree(t);
1638         }
1639         kfree(info->psinfo.data);
1640 }
1641
1642 #else
1643
1644 /* Here is the structure in which status of each thread is captured. */
1645 struct elf_thread_status
1646 {
1647         struct list_head list;
1648         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1649         elf_fpregset_t fpu;             /* NT_PRFPREG */
1650         struct task_struct *thread;
1651 #ifdef ELF_CORE_COPY_XFPREGS
1652         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1653 #endif
1654         struct memelfnote notes[3];
1655         int num_notes;
1656 };
1657
1658 /*
1659  * In order to add the specific thread information for the elf file format,
1660  * we need to keep a linked list of every threads pr_status and then create
1661  * a single section for them in the final core file.
1662  */
1663 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1664 {
1665         int sz = 0;
1666         struct task_struct *p = t->thread;
1667         t->num_notes = 0;
1668
1669         fill_prstatus(&t->prstatus, p, signr);
1670         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1671         
1672         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1673                   &(t->prstatus));
1674         t->num_notes++;
1675         sz += notesize(&t->notes[0]);
1676
1677         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1678                                                                 &t->fpu))) {
1679                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1680                           &(t->fpu));
1681                 t->num_notes++;
1682                 sz += notesize(&t->notes[1]);
1683         }
1684
1685 #ifdef ELF_CORE_COPY_XFPREGS
1686         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1687                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1688                           sizeof(t->xfpu), &t->xfpu);
1689                 t->num_notes++;
1690                 sz += notesize(&t->notes[2]);
1691         }
1692 #endif  
1693         return sz;
1694 }
1695
1696 struct elf_note_info {
1697         struct memelfnote *notes;
1698         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1699         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1700         struct list_head thread_list;
1701         elf_fpregset_t *fpu;
1702 #ifdef ELF_CORE_COPY_XFPREGS
1703         elf_fpxregset_t *xfpu;
1704 #endif
1705         int thread_status_size;
1706         int numnote;
1707 };
1708
1709 static int elf_note_info_init(struct elf_note_info *info)
1710 {
1711         memset(info, 0, sizeof(*info));
1712         INIT_LIST_HEAD(&info->thread_list);
1713
1714         /* Allocate space for six ELF notes */
1715         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1716         if (!info->notes)
1717                 return 0;
1718         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1719         if (!info->psinfo)
1720                 goto notes_free;
1721         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1722         if (!info->prstatus)
1723                 goto psinfo_free;
1724         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1725         if (!info->fpu)
1726                 goto prstatus_free;
1727 #ifdef ELF_CORE_COPY_XFPREGS
1728         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1729         if (!info->xfpu)
1730                 goto fpu_free;
1731 #endif
1732         return 1;
1733 #ifdef ELF_CORE_COPY_XFPREGS
1734  fpu_free:
1735         kfree(info->fpu);
1736 #endif
1737  prstatus_free:
1738         kfree(info->prstatus);
1739  psinfo_free:
1740         kfree(info->psinfo);
1741  notes_free:
1742         kfree(info->notes);
1743         return 0;
1744 }
1745
1746 static int fill_note_info(struct elfhdr *elf, int phdrs,
1747                           struct elf_note_info *info,
1748                           long signr, struct pt_regs *regs)
1749 {
1750         struct list_head *t;
1751
1752         if (!elf_note_info_init(info))
1753                 return 0;
1754
1755         if (signr) {
1756                 struct core_thread *ct;
1757                 struct elf_thread_status *ets;
1758
1759                 for (ct = current->mm->core_state->dumper.next;
1760                                                 ct; ct = ct->next) {
1761                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1762                         if (!ets)
1763                                 return 0;
1764
1765                         ets->thread = ct->task;
1766                         list_add(&ets->list, &info->thread_list);
1767                 }
1768
1769                 list_for_each(t, &info->thread_list) {
1770                         int sz;
1771
1772                         ets = list_entry(t, struct elf_thread_status, list);
1773                         sz = elf_dump_thread_status(signr, ets);
1774                         info->thread_status_size += sz;
1775                 }
1776         }
1777         /* now collect the dump for the current */
1778         memset(info->prstatus, 0, sizeof(*info->prstatus));
1779         fill_prstatus(info->prstatus, current, signr);
1780         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1781
1782         /* Set up header */
1783         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1784
1785         /*
1786          * Set up the notes in similar form to SVR4 core dumps made
1787          * with info from their /proc.
1788          */
1789
1790         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1791                   sizeof(*info->prstatus), info->prstatus);
1792         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1793         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1794                   sizeof(*info->psinfo), info->psinfo);
1795
1796         info->numnote = 2;
1797
1798         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1799
1800         /* Try to dump the FPU. */
1801         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1802                                                                info->fpu);
1803         if (info->prstatus->pr_fpvalid)
1804                 fill_note(info->notes + info->numnote++,
1805                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1806 #ifdef ELF_CORE_COPY_XFPREGS
1807         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1808                 fill_note(info->notes + info->numnote++,
1809                           "LINUX", ELF_CORE_XFPREG_TYPE,
1810                           sizeof(*info->xfpu), info->xfpu);
1811 #endif
1812
1813         return 1;
1814 }
1815
1816 static size_t get_note_info_size(struct elf_note_info *info)
1817 {
1818         int sz = 0;
1819         int i;
1820
1821         for (i = 0; i < info->numnote; i++)
1822                 sz += notesize(info->notes + i);
1823
1824         sz += info->thread_status_size;
1825
1826         return sz;
1827 }
1828
1829 static int write_note_info(struct elf_note_info *info,
1830                            struct file *file, loff_t *foffset)
1831 {
1832         int i;
1833         struct list_head *t;
1834
1835         for (i = 0; i < info->numnote; i++)
1836                 if (!writenote(info->notes + i, file, foffset))
1837                         return 0;
1838
1839         /* write out the thread status notes section */
1840         list_for_each(t, &info->thread_list) {
1841                 struct elf_thread_status *tmp =
1842                                 list_entry(t, struct elf_thread_status, list);
1843
1844                 for (i = 0; i < tmp->num_notes; i++)
1845                         if (!writenote(&tmp->notes[i], file, foffset))
1846                                 return 0;
1847         }
1848
1849         return 1;
1850 }
1851
1852 static void free_note_info(struct elf_note_info *info)
1853 {
1854         while (!list_empty(&info->thread_list)) {
1855                 struct list_head *tmp = info->thread_list.next;
1856                 list_del(tmp);
1857                 kfree(list_entry(tmp, struct elf_thread_status, list));
1858         }
1859
1860         kfree(info->prstatus);
1861         kfree(info->psinfo);
1862         kfree(info->notes);
1863         kfree(info->fpu);
1864 #ifdef ELF_CORE_COPY_XFPREGS
1865         kfree(info->xfpu);
1866 #endif
1867 }
1868
1869 #endif
1870
1871 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1872                                         struct vm_area_struct *gate_vma)
1873 {
1874         struct vm_area_struct *ret = tsk->mm->mmap;
1875
1876         if (ret)
1877                 return ret;
1878         return gate_vma;
1879 }
1880 /*
1881  * Helper function for iterating across a vma list.  It ensures that the caller
1882  * will visit `gate_vma' prior to terminating the search.
1883  */
1884 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1885                                         struct vm_area_struct *gate_vma)
1886 {
1887         struct vm_area_struct *ret;
1888
1889         ret = this_vma->vm_next;
1890         if (ret)
1891                 return ret;
1892         if (this_vma == gate_vma)
1893                 return NULL;
1894         return gate_vma;
1895 }
1896
1897 /*
1898  * Actual dumper
1899  *
1900  * This is a two-pass process; first we find the offsets of the bits,
1901  * and then they are actually written out.  If we run out of core limit
1902  * we just truncate.
1903  */
1904 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1905 {
1906         int has_dumped = 0;
1907         mm_segment_t fs;
1908         int segs;
1909         size_t size = 0;
1910         struct vm_area_struct *vma, *gate_vma;
1911         struct elfhdr *elf = NULL;
1912         loff_t offset = 0, dataoff, foffset;
1913         unsigned long mm_flags;
1914         struct elf_note_info info;
1915
1916         /*
1917          * We no longer stop all VM operations.
1918          * 
1919          * This is because those proceses that could possibly change map_count
1920          * or the mmap / vma pages are now blocked in do_exit on current
1921          * finishing this core dump.
1922          *
1923          * Only ptrace can touch these memory addresses, but it doesn't change
1924          * the map_count or the pages allocated. So no possibility of crashing
1925          * exists while dumping the mm->vm_next areas to the core file.
1926          */
1927   
1928         /* alloc memory for large data structures: too large to be on stack */
1929         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1930         if (!elf)
1931                 goto out;
1932         /*
1933          * The number of segs are recored into ELF header as 16bit value.
1934          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1935          */
1936         segs = current->mm->map_count;
1937 #ifdef ELF_CORE_EXTRA_PHDRS
1938         segs += ELF_CORE_EXTRA_PHDRS;
1939 #endif
1940
1941         gate_vma = get_gate_vma(current);
1942         if (gate_vma != NULL)
1943                 segs++;
1944
1945         /*
1946          * Collect all the non-memory information about the process for the
1947          * notes.  This also sets up the file header.
1948          */
1949         if (!fill_note_info(elf, segs + 1, /* including notes section */
1950                             &info, signr, regs))
1951                 goto cleanup;
1952
1953         has_dumped = 1;
1954         current->flags |= PF_DUMPCORE;
1955   
1956         fs = get_fs();
1957         set_fs(KERNEL_DS);
1958
1959         DUMP_WRITE(elf, sizeof(*elf));
1960         offset += sizeof(*elf);                         /* Elf header */
1961         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1962         foffset = offset;
1963
1964         /* Write notes phdr entry */
1965         {
1966                 struct elf_phdr phdr;
1967                 size_t sz = get_note_info_size(&info);
1968
1969                 sz += elf_coredump_extra_notes_size();
1970
1971                 fill_elf_note_phdr(&phdr, sz, offset);
1972                 offset += sz;
1973                 DUMP_WRITE(&phdr, sizeof(phdr));
1974         }
1975
1976         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1977
1978         /*
1979          * We must use the same mm->flags while dumping core to avoid
1980          * inconsistency between the program headers and bodies, otherwise an
1981          * unusable core file can be generated.
1982          */
1983         mm_flags = current->mm->flags;
1984
1985         /* Write program headers for segments dump */
1986         for (vma = first_vma(current, gate_vma); vma != NULL;
1987                         vma = next_vma(vma, gate_vma)) {
1988                 struct elf_phdr phdr;
1989
1990                 phdr.p_type = PT_LOAD;
1991                 phdr.p_offset = offset;
1992                 phdr.p_vaddr = vma->vm_start;
1993                 phdr.p_paddr = 0;
1994                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1995                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1996                 offset += phdr.p_filesz;
1997                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1998                 if (vma->vm_flags & VM_WRITE)
1999                         phdr.p_flags |= PF_W;
2000                 if (vma->vm_flags & VM_EXEC)
2001                         phdr.p_flags |= PF_X;
2002                 phdr.p_align = ELF_EXEC_PAGESIZE;
2003
2004                 DUMP_WRITE(&phdr, sizeof(phdr));
2005         }
2006
2007 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2008         ELF_CORE_WRITE_EXTRA_PHDRS;
2009 #endif
2010
2011         /* write out the notes section */
2012         if (!write_note_info(&info, file, &foffset))
2013                 goto end_coredump;
2014
2015         if (elf_coredump_extra_notes_write(file, &foffset))
2016                 goto end_coredump;
2017
2018         /* Align to page */
2019         if (!dump_seek(file, dataoff - foffset))
2020                 goto end_coredump;
2021
2022         for (vma = first_vma(current, gate_vma); vma != NULL;
2023                         vma = next_vma(vma, gate_vma)) {
2024                 unsigned long addr;
2025                 unsigned long end;
2026
2027                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2028
2029                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2030                         struct page *page;
2031                         int stop;
2032
2033                         page = get_dump_page(addr);
2034                         if (page) {
2035                                 void *kaddr = kmap(page);
2036                                 stop = ((size += PAGE_SIZE) > limit) ||
2037                                         !dump_write(file, kaddr, PAGE_SIZE);
2038                                 kunmap(page);
2039                                 page_cache_release(page);
2040                         } else
2041                                 stop = !dump_seek(file, PAGE_SIZE);
2042                         if (stop)
2043                                 goto end_coredump;
2044                 }
2045         }
2046
2047 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2048         ELF_CORE_WRITE_EXTRA_DATA;
2049 #endif
2050
2051 end_coredump:
2052         set_fs(fs);
2053
2054 cleanup:
2055         free_note_info(&info);
2056         kfree(elf);
2057 out:
2058         return has_dumped;
2059 }
2060
2061 #endif          /* CONFIG_ELF_CORE */
2062
2063 static int __init init_elf_binfmt(void)
2064 {
2065         return register_binfmt(&elf_format);
2066 }
2067
2068 static void __exit exit_elf_binfmt(void)
2069 {
2070         /* Remove the COFF and ELF loaders. */
2071         unregister_binfmt(&elf_format);
2072 }
2073
2074 core_initcall(init_elf_binfmt);
2075 module_exit(exit_elf_binfmt);
2076 MODULE_LICENSE("GPL");