Don't mess with descriptor table in load_elf_binary()
[safe/jmp/linux-2.6] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48                                 int, int, unsigned long);
49
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump   NULL
58 #endif
59
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN   PAGE_SIZE
64 #endif
65
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS 0
68 #endif
69
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74 static struct linux_binfmt elf_format = {
75                 .module         = THIS_MODULE,
76                 .load_binary    = load_elf_binary,
77                 .load_shlib     = load_elf_library,
78                 .core_dump      = elf_core_dump,
79                 .min_coredump   = ELF_EXEC_PAGESIZE,
80                 .hasvdso        = 1
81 };
82
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87         start = ELF_PAGEALIGN(start);
88         end = ELF_PAGEALIGN(end);
89         if (end > start) {
90                 unsigned long addr;
91                 down_write(&current->mm->mmap_sem);
92                 addr = do_brk(start, end - start);
93                 up_write(&current->mm->mmap_sem);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145                 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147         unsigned long p = bprm->p;
148         int argc = bprm->argc;
149         int envc = bprm->envc;
150         elf_addr_t __user *argv;
151         elf_addr_t __user *envp;
152         elf_addr_t __user *sp;
153         elf_addr_t __user *u_platform;
154         elf_addr_t __user *u_base_platform;
155         elf_addr_t __user *u_rand_bytes;
156         const char *k_platform = ELF_PLATFORM;
157         const char *k_base_platform = ELF_BASE_PLATFORM;
158         unsigned char k_rand_bytes[16];
159         int items;
160         elf_addr_t *elf_info;
161         int ei_index = 0;
162         const struct cred *cred = current_cred();
163         struct vm_area_struct *vma;
164
165         /*
166          * In some cases (e.g. Hyper-Threading), we want to avoid L1
167          * evictions by the processes running on the same package. One
168          * thing we can do is to shuffle the initial stack for them.
169          */
170
171         p = arch_align_stack(p);
172
173         /*
174          * If this architecture has a platform capability string, copy it
175          * to userspace.  In some cases (Sparc), this info is impossible
176          * for userspace to get any other way, in others (i386) it is
177          * merely difficult.
178          */
179         u_platform = NULL;
180         if (k_platform) {
181                 size_t len = strlen(k_platform) + 1;
182
183                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184                 if (__copy_to_user(u_platform, k_platform, len))
185                         return -EFAULT;
186         }
187
188         /*
189          * If this architecture has a "base" platform capability
190          * string, copy it to userspace.
191          */
192         u_base_platform = NULL;
193         if (k_base_platform) {
194                 size_t len = strlen(k_base_platform) + 1;
195
196                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197                 if (__copy_to_user(u_base_platform, k_base_platform, len))
198                         return -EFAULT;
199         }
200
201         /*
202          * Generate 16 random bytes for userspace PRNG seeding.
203          */
204         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205         u_rand_bytes = (elf_addr_t __user *)
206                        STACK_ALLOC(p, sizeof(k_rand_bytes));
207         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208                 return -EFAULT;
209
210         /* Create the ELF interpreter info */
211         elf_info = (elf_addr_t *)current->mm->saved_auxv;
212         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214         do { \
215                 elf_info[ei_index++] = id; \
216                 elf_info[ei_index++] = val; \
217         } while (0)
218
219 #ifdef ARCH_DLINFO
220         /* 
221          * ARCH_DLINFO must come first so PPC can do its special alignment of
222          * AUXV.
223          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224          * ARCH_DLINFO changes
225          */
226         ARCH_DLINFO;
227 #endif
228         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234         NEW_AUX_ENT(AT_BASE, interp_load_addr);
235         NEW_AUX_ENT(AT_FLAGS, 0);
236         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237         NEW_AUX_ENT(AT_UID, cred->uid);
238         NEW_AUX_ENT(AT_EUID, cred->euid);
239         NEW_AUX_ENT(AT_GID, cred->gid);
240         NEW_AUX_ENT(AT_EGID, cred->egid);
241         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
244         if (k_platform) {
245                 NEW_AUX_ENT(AT_PLATFORM,
246                             (elf_addr_t)(unsigned long)u_platform);
247         }
248         if (k_base_platform) {
249                 NEW_AUX_ENT(AT_BASE_PLATFORM,
250                             (elf_addr_t)(unsigned long)u_base_platform);
251         }
252         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
253                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
254         }
255 #undef NEW_AUX_ENT
256         /* AT_NULL is zero; clear the rest too */
257         memset(&elf_info[ei_index], 0,
258                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
259
260         /* And advance past the AT_NULL entry.  */
261         ei_index += 2;
262
263         sp = STACK_ADD(p, ei_index);
264
265         items = (argc + 1) + (envc + 1) + 1;
266         bprm->p = STACK_ROUND(sp, items);
267
268         /* Point sp at the lowest address on the stack */
269 #ifdef CONFIG_STACK_GROWSUP
270         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
271         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
272 #else
273         sp = (elf_addr_t __user *)bprm->p;
274 #endif
275
276
277         /*
278          * Grow the stack manually; some architectures have a limit on how
279          * far ahead a user-space access may be in order to grow the stack.
280          */
281         vma = find_extend_vma(current->mm, bprm->p);
282         if (!vma)
283                 return -EFAULT;
284
285         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
286         if (__put_user(argc, sp++))
287                 return -EFAULT;
288         argv = sp;
289         envp = argv + argc + 1;
290
291         /* Populate argv and envp */
292         p = current->mm->arg_end = current->mm->arg_start;
293         while (argc-- > 0) {
294                 size_t len;
295                 if (__put_user((elf_addr_t)p, argv++))
296                         return -EFAULT;
297                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298                 if (!len || len > MAX_ARG_STRLEN)
299                         return -EINVAL;
300                 p += len;
301         }
302         if (__put_user(0, argv))
303                 return -EFAULT;
304         current->mm->arg_end = current->mm->env_start = p;
305         while (envc-- > 0) {
306                 size_t len;
307                 if (__put_user((elf_addr_t)p, envp++))
308                         return -EFAULT;
309                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
310                 if (!len || len > MAX_ARG_STRLEN)
311                         return -EINVAL;
312                 p += len;
313         }
314         if (__put_user(0, envp))
315                 return -EFAULT;
316         current->mm->env_end = p;
317
318         /* Put the elf_info on the stack in the right place.  */
319         sp = (elf_addr_t __user *)envp + 1;
320         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
321                 return -EFAULT;
322         return 0;
323 }
324
325 #ifndef elf_map
326
327 static unsigned long elf_map(struct file *filep, unsigned long addr,
328                 struct elf_phdr *eppnt, int prot, int type,
329                 unsigned long total_size)
330 {
331         unsigned long map_addr;
332         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
333         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
334         addr = ELF_PAGESTART(addr);
335         size = ELF_PAGEALIGN(size);
336
337         /* mmap() will return -EINVAL if given a zero size, but a
338          * segment with zero filesize is perfectly valid */
339         if (!size)
340                 return addr;
341
342         down_write(&current->mm->mmap_sem);
343         /*
344         * total_size is the size of the ELF (interpreter) image.
345         * The _first_ mmap needs to know the full size, otherwise
346         * randomization might put this image into an overlapping
347         * position with the ELF binary image. (since size < total_size)
348         * So we first map the 'big' image - and unmap the remainder at
349         * the end. (which unmap is needed for ELF images with holes.)
350         */
351         if (total_size) {
352                 total_size = ELF_PAGEALIGN(total_size);
353                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
354                 if (!BAD_ADDR(map_addr))
355                         do_munmap(current->mm, map_addr+size, total_size-size);
356         } else
357                 map_addr = do_mmap(filep, addr, size, prot, type, off);
358
359         up_write(&current->mm->mmap_sem);
360         return(map_addr);
361 }
362
363 #endif /* !elf_map */
364
365 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
366 {
367         int i, first_idx = -1, last_idx = -1;
368
369         for (i = 0; i < nr; i++) {
370                 if (cmds[i].p_type == PT_LOAD) {
371                         last_idx = i;
372                         if (first_idx == -1)
373                                 first_idx = i;
374                 }
375         }
376         if (first_idx == -1)
377                 return 0;
378
379         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
380                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
381 }
382
383
384 /* This is much more generalized than the library routine read function,
385    so we keep this separate.  Technically the library read function
386    is only provided so that we can read a.out libraries that have
387    an ELF header */
388
389 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
390                 struct file *interpreter, unsigned long *interp_map_addr,
391                 unsigned long no_base)
392 {
393         struct elf_phdr *elf_phdata;
394         struct elf_phdr *eppnt;
395         unsigned long load_addr = 0;
396         int load_addr_set = 0;
397         unsigned long last_bss = 0, elf_bss = 0;
398         unsigned long error = ~0UL;
399         unsigned long total_size;
400         int retval, i, size;
401
402         /* First of all, some simple consistency checks */
403         if (interp_elf_ex->e_type != ET_EXEC &&
404             interp_elf_ex->e_type != ET_DYN)
405                 goto out;
406         if (!elf_check_arch(interp_elf_ex))
407                 goto out;
408         if (!interpreter->f_op || !interpreter->f_op->mmap)
409                 goto out;
410
411         /*
412          * If the size of this structure has changed, then punt, since
413          * we will be doing the wrong thing.
414          */
415         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
416                 goto out;
417         if (interp_elf_ex->e_phnum < 1 ||
418                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
419                 goto out;
420
421         /* Now read in all of the header information */
422         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
423         if (size > ELF_MIN_ALIGN)
424                 goto out;
425         elf_phdata = kmalloc(size, GFP_KERNEL);
426         if (!elf_phdata)
427                 goto out;
428
429         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
430                              (char *)elf_phdata,size);
431         error = -EIO;
432         if (retval != size) {
433                 if (retval < 0)
434                         error = retval; 
435                 goto out_close;
436         }
437
438         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
439         if (!total_size) {
440                 error = -EINVAL;
441                 goto out_close;
442         }
443
444         eppnt = elf_phdata;
445         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
446                 if (eppnt->p_type == PT_LOAD) {
447                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
448                         int elf_prot = 0;
449                         unsigned long vaddr = 0;
450                         unsigned long k, map_addr;
451
452                         if (eppnt->p_flags & PF_R)
453                                 elf_prot = PROT_READ;
454                         if (eppnt->p_flags & PF_W)
455                                 elf_prot |= PROT_WRITE;
456                         if (eppnt->p_flags & PF_X)
457                                 elf_prot |= PROT_EXEC;
458                         vaddr = eppnt->p_vaddr;
459                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
460                                 elf_type |= MAP_FIXED;
461                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
462                                 load_addr = -vaddr;
463
464                         map_addr = elf_map(interpreter, load_addr + vaddr,
465                                         eppnt, elf_prot, elf_type, total_size);
466                         total_size = 0;
467                         if (!*interp_map_addr)
468                                 *interp_map_addr = map_addr;
469                         error = map_addr;
470                         if (BAD_ADDR(map_addr))
471                                 goto out_close;
472
473                         if (!load_addr_set &&
474                             interp_elf_ex->e_type == ET_DYN) {
475                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
476                                 load_addr_set = 1;
477                         }
478
479                         /*
480                          * Check to see if the section's size will overflow the
481                          * allowed task size. Note that p_filesz must always be
482                          * <= p_memsize so it's only necessary to check p_memsz.
483                          */
484                         k = load_addr + eppnt->p_vaddr;
485                         if (BAD_ADDR(k) ||
486                             eppnt->p_filesz > eppnt->p_memsz ||
487                             eppnt->p_memsz > TASK_SIZE ||
488                             TASK_SIZE - eppnt->p_memsz < k) {
489                                 error = -ENOMEM;
490                                 goto out_close;
491                         }
492
493                         /*
494                          * Find the end of the file mapping for this phdr, and
495                          * keep track of the largest address we see for this.
496                          */
497                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
498                         if (k > elf_bss)
499                                 elf_bss = k;
500
501                         /*
502                          * Do the same thing for the memory mapping - between
503                          * elf_bss and last_bss is the bss section.
504                          */
505                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
506                         if (k > last_bss)
507                                 last_bss = k;
508                 }
509         }
510
511         /*
512          * Now fill out the bss section.  First pad the last page up
513          * to the page boundary, and then perform a mmap to make sure
514          * that there are zero-mapped pages up to and including the 
515          * last bss page.
516          */
517         if (padzero(elf_bss)) {
518                 error = -EFAULT;
519                 goto out_close;
520         }
521
522         /* What we have mapped so far */
523         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
524
525         /* Map the last of the bss segment */
526         if (last_bss > elf_bss) {
527                 down_write(&current->mm->mmap_sem);
528                 error = do_brk(elf_bss, last_bss - elf_bss);
529                 up_write(&current->mm->mmap_sem);
530                 if (BAD_ADDR(error))
531                         goto out_close;
532         }
533
534         error = load_addr;
535
536 out_close:
537         kfree(elf_phdata);
538 out:
539         return error;
540 }
541
542 /*
543  * These are the functions used to load ELF style executables and shared
544  * libraries.  There is no binary dependent code anywhere else.
545  */
546
547 #define INTERPRETER_NONE 0
548 #define INTERPRETER_ELF 2
549
550 #ifndef STACK_RND_MASK
551 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
552 #endif
553
554 static unsigned long randomize_stack_top(unsigned long stack_top)
555 {
556         unsigned int random_variable = 0;
557
558         if ((current->flags & PF_RANDOMIZE) &&
559                 !(current->personality & ADDR_NO_RANDOMIZE)) {
560                 random_variable = get_random_int() & STACK_RND_MASK;
561                 random_variable <<= PAGE_SHIFT;
562         }
563 #ifdef CONFIG_STACK_GROWSUP
564         return PAGE_ALIGN(stack_top) + random_variable;
565 #else
566         return PAGE_ALIGN(stack_top) - random_variable;
567 #endif
568 }
569
570 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
571 {
572         struct file *interpreter = NULL; /* to shut gcc up */
573         unsigned long load_addr = 0, load_bias = 0;
574         int load_addr_set = 0;
575         char * elf_interpreter = NULL;
576         unsigned long error;
577         struct elf_phdr *elf_ppnt, *elf_phdata;
578         unsigned long elf_bss, elf_brk;
579         int retval, i;
580         unsigned int size;
581         unsigned long elf_entry;
582         unsigned long interp_load_addr = 0;
583         unsigned long start_code, end_code, start_data, end_data;
584         unsigned long reloc_func_desc = 0;
585         int executable_stack = EXSTACK_DEFAULT;
586         unsigned long def_flags = 0;
587         struct {
588                 struct elfhdr elf_ex;
589                 struct elfhdr interp_elf_ex;
590         } *loc;
591
592         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
593         if (!loc) {
594                 retval = -ENOMEM;
595                 goto out_ret;
596         }
597         
598         /* Get the exec-header */
599         loc->elf_ex = *((struct elfhdr *)bprm->buf);
600
601         retval = -ENOEXEC;
602         /* First of all, some simple consistency checks */
603         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
604                 goto out;
605
606         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
607                 goto out;
608         if (!elf_check_arch(&loc->elf_ex))
609                 goto out;
610         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
611                 goto out;
612
613         /* Now read in all of the header information */
614         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
615                 goto out;
616         if (loc->elf_ex.e_phnum < 1 ||
617                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
618                 goto out;
619         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
620         retval = -ENOMEM;
621         elf_phdata = kmalloc(size, GFP_KERNEL);
622         if (!elf_phdata)
623                 goto out;
624
625         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
626                              (char *)elf_phdata, size);
627         if (retval != size) {
628                 if (retval >= 0)
629                         retval = -EIO;
630                 goto out_free_ph;
631         }
632
633         elf_ppnt = elf_phdata;
634         elf_bss = 0;
635         elf_brk = 0;
636
637         start_code = ~0UL;
638         end_code = 0;
639         start_data = 0;
640         end_data = 0;
641
642         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
643                 if (elf_ppnt->p_type == PT_INTERP) {
644                         /* This is the program interpreter used for
645                          * shared libraries - for now assume that this
646                          * is an a.out format binary
647                          */
648                         retval = -ENOEXEC;
649                         if (elf_ppnt->p_filesz > PATH_MAX || 
650                             elf_ppnt->p_filesz < 2)
651                                 goto out_free_ph;
652
653                         retval = -ENOMEM;
654                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
655                                                   GFP_KERNEL);
656                         if (!elf_interpreter)
657                                 goto out_free_ph;
658
659                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
660                                              elf_interpreter,
661                                              elf_ppnt->p_filesz);
662                         if (retval != elf_ppnt->p_filesz) {
663                                 if (retval >= 0)
664                                         retval = -EIO;
665                                 goto out_free_interp;
666                         }
667                         /* make sure path is NULL terminated */
668                         retval = -ENOEXEC;
669                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
670                                 goto out_free_interp;
671
672                         /*
673                          * The early SET_PERSONALITY here is so that the lookup
674                          * for the interpreter happens in the namespace of the 
675                          * to-be-execed image.  SET_PERSONALITY can select an
676                          * alternate root.
677                          *
678                          * However, SET_PERSONALITY is NOT allowed to switch
679                          * this task into the new images's memory mapping
680                          * policy - that is, TASK_SIZE must still evaluate to
681                          * that which is appropriate to the execing application.
682                          * This is because exit_mmap() needs to have TASK_SIZE
683                          * evaluate to the size of the old image.
684                          *
685                          * So if (say) a 64-bit application is execing a 32-bit
686                          * application it is the architecture's responsibility
687                          * to defer changing the value of TASK_SIZE until the
688                          * switch really is going to happen - do this in
689                          * flush_thread().      - akpm
690                          */
691                         SET_PERSONALITY(loc->elf_ex);
692
693                         interpreter = open_exec(elf_interpreter);
694                         retval = PTR_ERR(interpreter);
695                         if (IS_ERR(interpreter))
696                                 goto out_free_interp;
697
698                         /*
699                          * If the binary is not readable then enforce
700                          * mm->dumpable = 0 regardless of the interpreter's
701                          * permissions.
702                          */
703                         if (file_permission(interpreter, MAY_READ) < 0)
704                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
705
706                         retval = kernel_read(interpreter, 0, bprm->buf,
707                                              BINPRM_BUF_SIZE);
708                         if (retval != BINPRM_BUF_SIZE) {
709                                 if (retval >= 0)
710                                         retval = -EIO;
711                                 goto out_free_dentry;
712                         }
713
714                         /* Get the exec headers */
715                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
716                         break;
717                 }
718                 elf_ppnt++;
719         }
720
721         elf_ppnt = elf_phdata;
722         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
723                 if (elf_ppnt->p_type == PT_GNU_STACK) {
724                         if (elf_ppnt->p_flags & PF_X)
725                                 executable_stack = EXSTACK_ENABLE_X;
726                         else
727                                 executable_stack = EXSTACK_DISABLE_X;
728                         break;
729                 }
730
731         /* Some simple consistency checks for the interpreter */
732         if (elf_interpreter) {
733                 retval = -ELIBBAD;
734                 /* Not an ELF interpreter */
735                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
736                         goto out_free_dentry;
737                 /* Verify the interpreter has a valid arch */
738                 if (!elf_check_arch(&loc->interp_elf_ex))
739                         goto out_free_dentry;
740         } else {
741                 /* Executables without an interpreter also need a personality  */
742                 SET_PERSONALITY(loc->elf_ex);
743         }
744
745         /* Flush all traces of the currently running executable */
746         retval = flush_old_exec(bprm);
747         if (retval)
748                 goto out_free_dentry;
749
750         /* OK, This is the point of no return */
751         current->flags &= ~PF_FORKNOEXEC;
752         current->mm->def_flags = def_flags;
753
754         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
755            may depend on the personality.  */
756         SET_PERSONALITY(loc->elf_ex);
757         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
758                 current->personality |= READ_IMPLIES_EXEC;
759
760         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
761                 current->flags |= PF_RANDOMIZE;
762         arch_pick_mmap_layout(current->mm);
763
764         /* Do this so that we can load the interpreter, if need be.  We will
765            change some of these later */
766         current->mm->free_area_cache = current->mm->mmap_base;
767         current->mm->cached_hole_size = 0;
768         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
769                                  executable_stack);
770         if (retval < 0) {
771                 send_sig(SIGKILL, current, 0);
772                 goto out_free_dentry;
773         }
774         
775         current->mm->start_stack = bprm->p;
776
777         /* Now we do a little grungy work by mmaping the ELF image into
778            the correct location in memory. */
779         for(i = 0, elf_ppnt = elf_phdata;
780             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
781                 int elf_prot = 0, elf_flags;
782                 unsigned long k, vaddr;
783
784                 if (elf_ppnt->p_type != PT_LOAD)
785                         continue;
786
787                 if (unlikely (elf_brk > elf_bss)) {
788                         unsigned long nbyte;
789                     
790                         /* There was a PT_LOAD segment with p_memsz > p_filesz
791                            before this one. Map anonymous pages, if needed,
792                            and clear the area.  */
793                         retval = set_brk (elf_bss + load_bias,
794                                           elf_brk + load_bias);
795                         if (retval) {
796                                 send_sig(SIGKILL, current, 0);
797                                 goto out_free_dentry;
798                         }
799                         nbyte = ELF_PAGEOFFSET(elf_bss);
800                         if (nbyte) {
801                                 nbyte = ELF_MIN_ALIGN - nbyte;
802                                 if (nbyte > elf_brk - elf_bss)
803                                         nbyte = elf_brk - elf_bss;
804                                 if (clear_user((void __user *)elf_bss +
805                                                         load_bias, nbyte)) {
806                                         /*
807                                          * This bss-zeroing can fail if the ELF
808                                          * file specifies odd protections. So
809                                          * we don't check the return value
810                                          */
811                                 }
812                         }
813                 }
814
815                 if (elf_ppnt->p_flags & PF_R)
816                         elf_prot |= PROT_READ;
817                 if (elf_ppnt->p_flags & PF_W)
818                         elf_prot |= PROT_WRITE;
819                 if (elf_ppnt->p_flags & PF_X)
820                         elf_prot |= PROT_EXEC;
821
822                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
823
824                 vaddr = elf_ppnt->p_vaddr;
825                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
826                         elf_flags |= MAP_FIXED;
827                 } else if (loc->elf_ex.e_type == ET_DYN) {
828                         /* Try and get dynamic programs out of the way of the
829                          * default mmap base, as well as whatever program they
830                          * might try to exec.  This is because the brk will
831                          * follow the loader, and is not movable.  */
832 #ifdef CONFIG_X86
833                         load_bias = 0;
834 #else
835                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
836 #endif
837                 }
838
839                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
840                                 elf_prot, elf_flags, 0);
841                 if (BAD_ADDR(error)) {
842                         send_sig(SIGKILL, current, 0);
843                         retval = IS_ERR((void *)error) ?
844                                 PTR_ERR((void*)error) : -EINVAL;
845                         goto out_free_dentry;
846                 }
847
848                 if (!load_addr_set) {
849                         load_addr_set = 1;
850                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
851                         if (loc->elf_ex.e_type == ET_DYN) {
852                                 load_bias += error -
853                                              ELF_PAGESTART(load_bias + vaddr);
854                                 load_addr += load_bias;
855                                 reloc_func_desc = load_bias;
856                         }
857                 }
858                 k = elf_ppnt->p_vaddr;
859                 if (k < start_code)
860                         start_code = k;
861                 if (start_data < k)
862                         start_data = k;
863
864                 /*
865                  * Check to see if the section's size will overflow the
866                  * allowed task size. Note that p_filesz must always be
867                  * <= p_memsz so it is only necessary to check p_memsz.
868                  */
869                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
870                     elf_ppnt->p_memsz > TASK_SIZE ||
871                     TASK_SIZE - elf_ppnt->p_memsz < k) {
872                         /* set_brk can never work. Avoid overflows. */
873                         send_sig(SIGKILL, current, 0);
874                         retval = -EINVAL;
875                         goto out_free_dentry;
876                 }
877
878                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
879
880                 if (k > elf_bss)
881                         elf_bss = k;
882                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
883                         end_code = k;
884                 if (end_data < k)
885                         end_data = k;
886                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
887                 if (k > elf_brk)
888                         elf_brk = k;
889         }
890
891         loc->elf_ex.e_entry += load_bias;
892         elf_bss += load_bias;
893         elf_brk += load_bias;
894         start_code += load_bias;
895         end_code += load_bias;
896         start_data += load_bias;
897         end_data += load_bias;
898
899         /* Calling set_brk effectively mmaps the pages that we need
900          * for the bss and break sections.  We must do this before
901          * mapping in the interpreter, to make sure it doesn't wind
902          * up getting placed where the bss needs to go.
903          */
904         retval = set_brk(elf_bss, elf_brk);
905         if (retval) {
906                 send_sig(SIGKILL, current, 0);
907                 goto out_free_dentry;
908         }
909         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
910                 send_sig(SIGSEGV, current, 0);
911                 retval = -EFAULT; /* Nobody gets to see this, but.. */
912                 goto out_free_dentry;
913         }
914
915         if (elf_interpreter) {
916                 unsigned long uninitialized_var(interp_map_addr);
917
918                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
919                                             interpreter,
920                                             &interp_map_addr,
921                                             load_bias);
922                 if (!IS_ERR((void *)elf_entry)) {
923                         /*
924                          * load_elf_interp() returns relocation
925                          * adjustment
926                          */
927                         interp_load_addr = elf_entry;
928                         elf_entry += loc->interp_elf_ex.e_entry;
929                 }
930                 if (BAD_ADDR(elf_entry)) {
931                         force_sig(SIGSEGV, current);
932                         retval = IS_ERR((void *)elf_entry) ?
933                                         (int)elf_entry : -EINVAL;
934                         goto out_free_dentry;
935                 }
936                 reloc_func_desc = interp_load_addr;
937
938                 allow_write_access(interpreter);
939                 fput(interpreter);
940                 kfree(elf_interpreter);
941         } else {
942                 elf_entry = loc->elf_ex.e_entry;
943                 if (BAD_ADDR(elf_entry)) {
944                         force_sig(SIGSEGV, current);
945                         retval = -EINVAL;
946                         goto out_free_dentry;
947                 }
948         }
949
950         kfree(elf_phdata);
951
952         set_binfmt(&elf_format);
953
954 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
955         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
956         if (retval < 0) {
957                 send_sig(SIGKILL, current, 0);
958                 goto out;
959         }
960 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
961
962         install_exec_creds(bprm);
963         current->flags &= ~PF_FORKNOEXEC;
964         retval = create_elf_tables(bprm, &loc->elf_ex,
965                           load_addr, interp_load_addr);
966         if (retval < 0) {
967                 send_sig(SIGKILL, current, 0);
968                 goto out;
969         }
970         /* N.B. passed_fileno might not be initialized? */
971         current->mm->end_code = end_code;
972         current->mm->start_code = start_code;
973         current->mm->start_data = start_data;
974         current->mm->end_data = end_data;
975         current->mm->start_stack = bprm->p;
976
977 #ifdef arch_randomize_brk
978         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
979                 current->mm->brk = current->mm->start_brk =
980                         arch_randomize_brk(current->mm);
981 #endif
982
983         if (current->personality & MMAP_PAGE_ZERO) {
984                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
985                    and some applications "depend" upon this behavior.
986                    Since we do not have the power to recompile these, we
987                    emulate the SVr4 behavior. Sigh. */
988                 down_write(&current->mm->mmap_sem);
989                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
990                                 MAP_FIXED | MAP_PRIVATE, 0);
991                 up_write(&current->mm->mmap_sem);
992         }
993
994 #ifdef ELF_PLAT_INIT
995         /*
996          * The ABI may specify that certain registers be set up in special
997          * ways (on i386 %edx is the address of a DT_FINI function, for
998          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
999          * that the e_entry field is the address of the function descriptor
1000          * for the startup routine, rather than the address of the startup
1001          * routine itself.  This macro performs whatever initialization to
1002          * the regs structure is required as well as any relocations to the
1003          * function descriptor entries when executing dynamically links apps.
1004          */
1005         ELF_PLAT_INIT(regs, reloc_func_desc);
1006 #endif
1007
1008         start_thread(regs, elf_entry, bprm->p);
1009         retval = 0;
1010 out:
1011         kfree(loc);
1012 out_ret:
1013         return retval;
1014
1015         /* error cleanup */
1016 out_free_dentry:
1017         allow_write_access(interpreter);
1018         if (interpreter)
1019                 fput(interpreter);
1020 out_free_interp:
1021         kfree(elf_interpreter);
1022 out_free_ph:
1023         kfree(elf_phdata);
1024         goto out;
1025 }
1026
1027 /* This is really simpleminded and specialized - we are loading an
1028    a.out library that is given an ELF header. */
1029 static int load_elf_library(struct file *file)
1030 {
1031         struct elf_phdr *elf_phdata;
1032         struct elf_phdr *eppnt;
1033         unsigned long elf_bss, bss, len;
1034         int retval, error, i, j;
1035         struct elfhdr elf_ex;
1036
1037         error = -ENOEXEC;
1038         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1039         if (retval != sizeof(elf_ex))
1040                 goto out;
1041
1042         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1043                 goto out;
1044
1045         /* First of all, some simple consistency checks */
1046         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1047             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1048                 goto out;
1049
1050         /* Now read in all of the header information */
1051
1052         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1053         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1054
1055         error = -ENOMEM;
1056         elf_phdata = kmalloc(j, GFP_KERNEL);
1057         if (!elf_phdata)
1058                 goto out;
1059
1060         eppnt = elf_phdata;
1061         error = -ENOEXEC;
1062         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1063         if (retval != j)
1064                 goto out_free_ph;
1065
1066         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1067                 if ((eppnt + i)->p_type == PT_LOAD)
1068                         j++;
1069         if (j != 1)
1070                 goto out_free_ph;
1071
1072         while (eppnt->p_type != PT_LOAD)
1073                 eppnt++;
1074
1075         /* Now use mmap to map the library into memory. */
1076         down_write(&current->mm->mmap_sem);
1077         error = do_mmap(file,
1078                         ELF_PAGESTART(eppnt->p_vaddr),
1079                         (eppnt->p_filesz +
1080                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1081                         PROT_READ | PROT_WRITE | PROT_EXEC,
1082                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1083                         (eppnt->p_offset -
1084                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1085         up_write(&current->mm->mmap_sem);
1086         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1087                 goto out_free_ph;
1088
1089         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1090         if (padzero(elf_bss)) {
1091                 error = -EFAULT;
1092                 goto out_free_ph;
1093         }
1094
1095         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1096                             ELF_MIN_ALIGN - 1);
1097         bss = eppnt->p_memsz + eppnt->p_vaddr;
1098         if (bss > len) {
1099                 down_write(&current->mm->mmap_sem);
1100                 do_brk(len, bss - len);
1101                 up_write(&current->mm->mmap_sem);
1102         }
1103         error = 0;
1104
1105 out_free_ph:
1106         kfree(elf_phdata);
1107 out:
1108         return error;
1109 }
1110
1111 /*
1112  * Note that some platforms still use traditional core dumps and not
1113  * the ELF core dump.  Each platform can select it as appropriate.
1114  */
1115 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1116
1117 /*
1118  * ELF core dumper
1119  *
1120  * Modelled on fs/exec.c:aout_core_dump()
1121  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1122  */
1123 /*
1124  * These are the only things you should do on a core-file: use only these
1125  * functions to write out all the necessary info.
1126  */
1127 static int dump_write(struct file *file, const void *addr, int nr)
1128 {
1129         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1130 }
1131
1132 static int dump_seek(struct file *file, loff_t off)
1133 {
1134         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1135                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1136                         return 0;
1137         } else {
1138                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1139                 if (!buf)
1140                         return 0;
1141                 while (off > 0) {
1142                         unsigned long n = off;
1143                         if (n > PAGE_SIZE)
1144                                 n = PAGE_SIZE;
1145                         if (!dump_write(file, buf, n))
1146                                 return 0;
1147                         off -= n;
1148                 }
1149                 free_page((unsigned long)buf);
1150         }
1151         return 1;
1152 }
1153
1154 /*
1155  * Decide what to dump of a segment, part, all or none.
1156  */
1157 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1158                                    unsigned long mm_flags)
1159 {
1160 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1161
1162         /* The vma can be set up to tell us the answer directly.  */
1163         if (vma->vm_flags & VM_ALWAYSDUMP)
1164                 goto whole;
1165
1166         /* Hugetlb memory check */
1167         if (vma->vm_flags & VM_HUGETLB) {
1168                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1169                         goto whole;
1170                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1171                         goto whole;
1172         }
1173
1174         /* Do not dump I/O mapped devices or special mappings */
1175         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1176                 return 0;
1177
1178         /* By default, dump shared memory if mapped from an anonymous file. */
1179         if (vma->vm_flags & VM_SHARED) {
1180                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1181                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1182                         goto whole;
1183                 return 0;
1184         }
1185
1186         /* Dump segments that have been written to.  */
1187         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1188                 goto whole;
1189         if (vma->vm_file == NULL)
1190                 return 0;
1191
1192         if (FILTER(MAPPED_PRIVATE))
1193                 goto whole;
1194
1195         /*
1196          * If this looks like the beginning of a DSO or executable mapping,
1197          * check for an ELF header.  If we find one, dump the first page to
1198          * aid in determining what was mapped here.
1199          */
1200         if (FILTER(ELF_HEADERS) &&
1201             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1202                 u32 __user *header = (u32 __user *) vma->vm_start;
1203                 u32 word;
1204                 mm_segment_t fs = get_fs();
1205                 /*
1206                  * Doing it this way gets the constant folded by GCC.
1207                  */
1208                 union {
1209                         u32 cmp;
1210                         char elfmag[SELFMAG];
1211                 } magic;
1212                 BUILD_BUG_ON(SELFMAG != sizeof word);
1213                 magic.elfmag[EI_MAG0] = ELFMAG0;
1214                 magic.elfmag[EI_MAG1] = ELFMAG1;
1215                 magic.elfmag[EI_MAG2] = ELFMAG2;
1216                 magic.elfmag[EI_MAG3] = ELFMAG3;
1217                 /*
1218                  * Switch to the user "segment" for get_user(),
1219                  * then put back what elf_core_dump() had in place.
1220                  */
1221                 set_fs(USER_DS);
1222                 if (unlikely(get_user(word, header)))
1223                         word = 0;
1224                 set_fs(fs);
1225                 if (word == magic.cmp)
1226                         return PAGE_SIZE;
1227         }
1228
1229 #undef  FILTER
1230
1231         return 0;
1232
1233 whole:
1234         return vma->vm_end - vma->vm_start;
1235 }
1236
1237 /* An ELF note in memory */
1238 struct memelfnote
1239 {
1240         const char *name;
1241         int type;
1242         unsigned int datasz;
1243         void *data;
1244 };
1245
1246 static int notesize(struct memelfnote *en)
1247 {
1248         int sz;
1249
1250         sz = sizeof(struct elf_note);
1251         sz += roundup(strlen(en->name) + 1, 4);
1252         sz += roundup(en->datasz, 4);
1253
1254         return sz;
1255 }
1256
1257 #define DUMP_WRITE(addr, nr, foffset)   \
1258         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1259
1260 static int alignfile(struct file *file, loff_t *foffset)
1261 {
1262         static const char buf[4] = { 0, };
1263         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1264         return 1;
1265 }
1266
1267 static int writenote(struct memelfnote *men, struct file *file,
1268                         loff_t *foffset)
1269 {
1270         struct elf_note en;
1271         en.n_namesz = strlen(men->name) + 1;
1272         en.n_descsz = men->datasz;
1273         en.n_type = men->type;
1274
1275         DUMP_WRITE(&en, sizeof(en), foffset);
1276         DUMP_WRITE(men->name, en.n_namesz, foffset);
1277         if (!alignfile(file, foffset))
1278                 return 0;
1279         DUMP_WRITE(men->data, men->datasz, foffset);
1280         if (!alignfile(file, foffset))
1281                 return 0;
1282
1283         return 1;
1284 }
1285 #undef DUMP_WRITE
1286
1287 #define DUMP_WRITE(addr, nr)    \
1288         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1289                 goto end_coredump;
1290 #define DUMP_SEEK(off)  \
1291         if (!dump_seek(file, (off))) \
1292                 goto end_coredump;
1293
1294 static void fill_elf_header(struct elfhdr *elf, int segs,
1295                             u16 machine, u32 flags, u8 osabi)
1296 {
1297         memset(elf, 0, sizeof(*elf));
1298
1299         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1300         elf->e_ident[EI_CLASS] = ELF_CLASS;
1301         elf->e_ident[EI_DATA] = ELF_DATA;
1302         elf->e_ident[EI_VERSION] = EV_CURRENT;
1303         elf->e_ident[EI_OSABI] = ELF_OSABI;
1304
1305         elf->e_type = ET_CORE;
1306         elf->e_machine = machine;
1307         elf->e_version = EV_CURRENT;
1308         elf->e_phoff = sizeof(struct elfhdr);
1309         elf->e_flags = flags;
1310         elf->e_ehsize = sizeof(struct elfhdr);
1311         elf->e_phentsize = sizeof(struct elf_phdr);
1312         elf->e_phnum = segs;
1313
1314         return;
1315 }
1316
1317 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1318 {
1319         phdr->p_type = PT_NOTE;
1320         phdr->p_offset = offset;
1321         phdr->p_vaddr = 0;
1322         phdr->p_paddr = 0;
1323         phdr->p_filesz = sz;
1324         phdr->p_memsz = 0;
1325         phdr->p_flags = 0;
1326         phdr->p_align = 0;
1327         return;
1328 }
1329
1330 static void fill_note(struct memelfnote *note, const char *name, int type, 
1331                 unsigned int sz, void *data)
1332 {
1333         note->name = name;
1334         note->type = type;
1335         note->datasz = sz;
1336         note->data = data;
1337         return;
1338 }
1339
1340 /*
1341  * fill up all the fields in prstatus from the given task struct, except
1342  * registers which need to be filled up separately.
1343  */
1344 static void fill_prstatus(struct elf_prstatus *prstatus,
1345                 struct task_struct *p, long signr)
1346 {
1347         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1348         prstatus->pr_sigpend = p->pending.signal.sig[0];
1349         prstatus->pr_sighold = p->blocked.sig[0];
1350         prstatus->pr_pid = task_pid_vnr(p);
1351         prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1352         prstatus->pr_pgrp = task_pgrp_vnr(p);
1353         prstatus->pr_sid = task_session_vnr(p);
1354         if (thread_group_leader(p)) {
1355                 struct task_cputime cputime;
1356
1357                 /*
1358                  * This is the record for the group leader.  It shows the
1359                  * group-wide total, not its individual thread total.
1360                  */
1361                 thread_group_cputime(p, &cputime);
1362                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1363                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1364         } else {
1365                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1366                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1367         }
1368         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1369         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1370 }
1371
1372 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1373                        struct mm_struct *mm)
1374 {
1375         const struct cred *cred;
1376         unsigned int i, len;
1377         
1378         /* first copy the parameters from user space */
1379         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1380
1381         len = mm->arg_end - mm->arg_start;
1382         if (len >= ELF_PRARGSZ)
1383                 len = ELF_PRARGSZ-1;
1384         if (copy_from_user(&psinfo->pr_psargs,
1385                            (const char __user *)mm->arg_start, len))
1386                 return -EFAULT;
1387         for(i = 0; i < len; i++)
1388                 if (psinfo->pr_psargs[i] == 0)
1389                         psinfo->pr_psargs[i] = ' ';
1390         psinfo->pr_psargs[len] = 0;
1391
1392         psinfo->pr_pid = task_pid_vnr(p);
1393         psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1394         psinfo->pr_pgrp = task_pgrp_vnr(p);
1395         psinfo->pr_sid = task_session_vnr(p);
1396
1397         i = p->state ? ffz(~p->state) + 1 : 0;
1398         psinfo->pr_state = i;
1399         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1400         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1401         psinfo->pr_nice = task_nice(p);
1402         psinfo->pr_flag = p->flags;
1403         rcu_read_lock();
1404         cred = __task_cred(p);
1405         SET_UID(psinfo->pr_uid, cred->uid);
1406         SET_GID(psinfo->pr_gid, cred->gid);
1407         rcu_read_unlock();
1408         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1409         
1410         return 0;
1411 }
1412
1413 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1414 {
1415         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1416         int i = 0;
1417         do
1418                 i += 2;
1419         while (auxv[i - 2] != AT_NULL);
1420         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1421 }
1422
1423 #ifdef CORE_DUMP_USE_REGSET
1424 #include <linux/regset.h>
1425
1426 struct elf_thread_core_info {
1427         struct elf_thread_core_info *next;
1428         struct task_struct *task;
1429         struct elf_prstatus prstatus;
1430         struct memelfnote notes[0];
1431 };
1432
1433 struct elf_note_info {
1434         struct elf_thread_core_info *thread;
1435         struct memelfnote psinfo;
1436         struct memelfnote auxv;
1437         size_t size;
1438         int thread_notes;
1439 };
1440
1441 /*
1442  * When a regset has a writeback hook, we call it on each thread before
1443  * dumping user memory.  On register window machines, this makes sure the
1444  * user memory backing the register data is up to date before we read it.
1445  */
1446 static void do_thread_regset_writeback(struct task_struct *task,
1447                                        const struct user_regset *regset)
1448 {
1449         if (regset->writeback)
1450                 regset->writeback(task, regset, 1);
1451 }
1452
1453 static int fill_thread_core_info(struct elf_thread_core_info *t,
1454                                  const struct user_regset_view *view,
1455                                  long signr, size_t *total)
1456 {
1457         unsigned int i;
1458
1459         /*
1460          * NT_PRSTATUS is the one special case, because the regset data
1461          * goes into the pr_reg field inside the note contents, rather
1462          * than being the whole note contents.  We fill the reset in here.
1463          * We assume that regset 0 is NT_PRSTATUS.
1464          */
1465         fill_prstatus(&t->prstatus, t->task, signr);
1466         (void) view->regsets[0].get(t->task, &view->regsets[0],
1467                                     0, sizeof(t->prstatus.pr_reg),
1468                                     &t->prstatus.pr_reg, NULL);
1469
1470         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1471                   sizeof(t->prstatus), &t->prstatus);
1472         *total += notesize(&t->notes[0]);
1473
1474         do_thread_regset_writeback(t->task, &view->regsets[0]);
1475
1476         /*
1477          * Each other regset might generate a note too.  For each regset
1478          * that has no core_note_type or is inactive, we leave t->notes[i]
1479          * all zero and we'll know to skip writing it later.
1480          */
1481         for (i = 1; i < view->n; ++i) {
1482                 const struct user_regset *regset = &view->regsets[i];
1483                 do_thread_regset_writeback(t->task, regset);
1484                 if (regset->core_note_type &&
1485                     (!regset->active || regset->active(t->task, regset))) {
1486                         int ret;
1487                         size_t size = regset->n * regset->size;
1488                         void *data = kmalloc(size, GFP_KERNEL);
1489                         if (unlikely(!data))
1490                                 return 0;
1491                         ret = regset->get(t->task, regset,
1492                                           0, size, data, NULL);
1493                         if (unlikely(ret))
1494                                 kfree(data);
1495                         else {
1496                                 if (regset->core_note_type != NT_PRFPREG)
1497                                         fill_note(&t->notes[i], "LINUX",
1498                                                   regset->core_note_type,
1499                                                   size, data);
1500                                 else {
1501                                         t->prstatus.pr_fpvalid = 1;
1502                                         fill_note(&t->notes[i], "CORE",
1503                                                   NT_PRFPREG, size, data);
1504                                 }
1505                                 *total += notesize(&t->notes[i]);
1506                         }
1507                 }
1508         }
1509
1510         return 1;
1511 }
1512
1513 static int fill_note_info(struct elfhdr *elf, int phdrs,
1514                           struct elf_note_info *info,
1515                           long signr, struct pt_regs *regs)
1516 {
1517         struct task_struct *dump_task = current;
1518         const struct user_regset_view *view = task_user_regset_view(dump_task);
1519         struct elf_thread_core_info *t;
1520         struct elf_prpsinfo *psinfo;
1521         struct core_thread *ct;
1522         unsigned int i;
1523
1524         info->size = 0;
1525         info->thread = NULL;
1526
1527         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1528         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1529
1530         if (psinfo == NULL)
1531                 return 0;
1532
1533         /*
1534          * Figure out how many notes we're going to need for each thread.
1535          */
1536         info->thread_notes = 0;
1537         for (i = 0; i < view->n; ++i)
1538                 if (view->regsets[i].core_note_type != 0)
1539                         ++info->thread_notes;
1540
1541         /*
1542          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1543          * since it is our one special case.
1544          */
1545         if (unlikely(info->thread_notes == 0) ||
1546             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1547                 WARN_ON(1);
1548                 return 0;
1549         }
1550
1551         /*
1552          * Initialize the ELF file header.
1553          */
1554         fill_elf_header(elf, phdrs,
1555                         view->e_machine, view->e_flags, view->ei_osabi);
1556
1557         /*
1558          * Allocate a structure for each thread.
1559          */
1560         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1561                 t = kzalloc(offsetof(struct elf_thread_core_info,
1562                                      notes[info->thread_notes]),
1563                             GFP_KERNEL);
1564                 if (unlikely(!t))
1565                         return 0;
1566
1567                 t->task = ct->task;
1568                 if (ct->task == dump_task || !info->thread) {
1569                         t->next = info->thread;
1570                         info->thread = t;
1571                 } else {
1572                         /*
1573                          * Make sure to keep the original task at
1574                          * the head of the list.
1575                          */
1576                         t->next = info->thread->next;
1577                         info->thread->next = t;
1578                 }
1579         }
1580
1581         /*
1582          * Now fill in each thread's information.
1583          */
1584         for (t = info->thread; t != NULL; t = t->next)
1585                 if (!fill_thread_core_info(t, view, signr, &info->size))
1586                         return 0;
1587
1588         /*
1589          * Fill in the two process-wide notes.
1590          */
1591         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1592         info->size += notesize(&info->psinfo);
1593
1594         fill_auxv_note(&info->auxv, current->mm);
1595         info->size += notesize(&info->auxv);
1596
1597         return 1;
1598 }
1599
1600 static size_t get_note_info_size(struct elf_note_info *info)
1601 {
1602         return info->size;
1603 }
1604
1605 /*
1606  * Write all the notes for each thread.  When writing the first thread, the
1607  * process-wide notes are interleaved after the first thread-specific note.
1608  */
1609 static int write_note_info(struct elf_note_info *info,
1610                            struct file *file, loff_t *foffset)
1611 {
1612         bool first = 1;
1613         struct elf_thread_core_info *t = info->thread;
1614
1615         do {
1616                 int i;
1617
1618                 if (!writenote(&t->notes[0], file, foffset))
1619                         return 0;
1620
1621                 if (first && !writenote(&info->psinfo, file, foffset))
1622                         return 0;
1623                 if (first && !writenote(&info->auxv, file, foffset))
1624                         return 0;
1625
1626                 for (i = 1; i < info->thread_notes; ++i)
1627                         if (t->notes[i].data &&
1628                             !writenote(&t->notes[i], file, foffset))
1629                                 return 0;
1630
1631                 first = 0;
1632                 t = t->next;
1633         } while (t);
1634
1635         return 1;
1636 }
1637
1638 static void free_note_info(struct elf_note_info *info)
1639 {
1640         struct elf_thread_core_info *threads = info->thread;
1641         while (threads) {
1642                 unsigned int i;
1643                 struct elf_thread_core_info *t = threads;
1644                 threads = t->next;
1645                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1646                 for (i = 1; i < info->thread_notes; ++i)
1647                         kfree(t->notes[i].data);
1648                 kfree(t);
1649         }
1650         kfree(info->psinfo.data);
1651 }
1652
1653 #else
1654
1655 /* Here is the structure in which status of each thread is captured. */
1656 struct elf_thread_status
1657 {
1658         struct list_head list;
1659         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1660         elf_fpregset_t fpu;             /* NT_PRFPREG */
1661         struct task_struct *thread;
1662 #ifdef ELF_CORE_COPY_XFPREGS
1663         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1664 #endif
1665         struct memelfnote notes[3];
1666         int num_notes;
1667 };
1668
1669 /*
1670  * In order to add the specific thread information for the elf file format,
1671  * we need to keep a linked list of every threads pr_status and then create
1672  * a single section for them in the final core file.
1673  */
1674 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1675 {
1676         int sz = 0;
1677         struct task_struct *p = t->thread;
1678         t->num_notes = 0;
1679
1680         fill_prstatus(&t->prstatus, p, signr);
1681         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1682         
1683         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1684                   &(t->prstatus));
1685         t->num_notes++;
1686         sz += notesize(&t->notes[0]);
1687
1688         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1689                                                                 &t->fpu))) {
1690                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1691                           &(t->fpu));
1692                 t->num_notes++;
1693                 sz += notesize(&t->notes[1]);
1694         }
1695
1696 #ifdef ELF_CORE_COPY_XFPREGS
1697         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1698                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1699                           sizeof(t->xfpu), &t->xfpu);
1700                 t->num_notes++;
1701                 sz += notesize(&t->notes[2]);
1702         }
1703 #endif  
1704         return sz;
1705 }
1706
1707 struct elf_note_info {
1708         struct memelfnote *notes;
1709         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1710         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1711         struct list_head thread_list;
1712         elf_fpregset_t *fpu;
1713 #ifdef ELF_CORE_COPY_XFPREGS
1714         elf_fpxregset_t *xfpu;
1715 #endif
1716         int thread_status_size;
1717         int numnote;
1718 };
1719
1720 static int fill_note_info(struct elfhdr *elf, int phdrs,
1721                           struct elf_note_info *info,
1722                           long signr, struct pt_regs *regs)
1723 {
1724 #define NUM_NOTES       6
1725         struct list_head *t;
1726
1727         info->notes = NULL;
1728         info->prstatus = NULL;
1729         info->psinfo = NULL;
1730         info->fpu = NULL;
1731 #ifdef ELF_CORE_COPY_XFPREGS
1732         info->xfpu = NULL;
1733 #endif
1734         INIT_LIST_HEAD(&info->thread_list);
1735
1736         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1737                               GFP_KERNEL);
1738         if (!info->notes)
1739                 return 0;
1740         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1741         if (!info->psinfo)
1742                 return 0;
1743         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1744         if (!info->prstatus)
1745                 return 0;
1746         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1747         if (!info->fpu)
1748                 return 0;
1749 #ifdef ELF_CORE_COPY_XFPREGS
1750         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1751         if (!info->xfpu)
1752                 return 0;
1753 #endif
1754
1755         info->thread_status_size = 0;
1756         if (signr) {
1757                 struct core_thread *ct;
1758                 struct elf_thread_status *ets;
1759
1760                 for (ct = current->mm->core_state->dumper.next;
1761                                                 ct; ct = ct->next) {
1762                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1763                         if (!ets)
1764                                 return 0;
1765
1766                         ets->thread = ct->task;
1767                         list_add(&ets->list, &info->thread_list);
1768                 }
1769
1770                 list_for_each(t, &info->thread_list) {
1771                         int sz;
1772
1773                         ets = list_entry(t, struct elf_thread_status, list);
1774                         sz = elf_dump_thread_status(signr, ets);
1775                         info->thread_status_size += sz;
1776                 }
1777         }
1778         /* now collect the dump for the current */
1779         memset(info->prstatus, 0, sizeof(*info->prstatus));
1780         fill_prstatus(info->prstatus, current, signr);
1781         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1782
1783         /* Set up header */
1784         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1785
1786         /*
1787          * Set up the notes in similar form to SVR4 core dumps made
1788          * with info from their /proc.
1789          */
1790
1791         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1792                   sizeof(*info->prstatus), info->prstatus);
1793         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1794         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1795                   sizeof(*info->psinfo), info->psinfo);
1796
1797         info->numnote = 2;
1798
1799         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1800
1801         /* Try to dump the FPU. */
1802         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1803                                                                info->fpu);
1804         if (info->prstatus->pr_fpvalid)
1805                 fill_note(info->notes + info->numnote++,
1806                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1807 #ifdef ELF_CORE_COPY_XFPREGS
1808         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1809                 fill_note(info->notes + info->numnote++,
1810                           "LINUX", ELF_CORE_XFPREG_TYPE,
1811                           sizeof(*info->xfpu), info->xfpu);
1812 #endif
1813
1814         return 1;
1815
1816 #undef NUM_NOTES
1817 }
1818
1819 static size_t get_note_info_size(struct elf_note_info *info)
1820 {
1821         int sz = 0;
1822         int i;
1823
1824         for (i = 0; i < info->numnote; i++)
1825                 sz += notesize(info->notes + i);
1826
1827         sz += info->thread_status_size;
1828
1829         return sz;
1830 }
1831
1832 static int write_note_info(struct elf_note_info *info,
1833                            struct file *file, loff_t *foffset)
1834 {
1835         int i;
1836         struct list_head *t;
1837
1838         for (i = 0; i < info->numnote; i++)
1839                 if (!writenote(info->notes + i, file, foffset))
1840                         return 0;
1841
1842         /* write out the thread status notes section */
1843         list_for_each(t, &info->thread_list) {
1844                 struct elf_thread_status *tmp =
1845                                 list_entry(t, struct elf_thread_status, list);
1846
1847                 for (i = 0; i < tmp->num_notes; i++)
1848                         if (!writenote(&tmp->notes[i], file, foffset))
1849                                 return 0;
1850         }
1851
1852         return 1;
1853 }
1854
1855 static void free_note_info(struct elf_note_info *info)
1856 {
1857         while (!list_empty(&info->thread_list)) {
1858                 struct list_head *tmp = info->thread_list.next;
1859                 list_del(tmp);
1860                 kfree(list_entry(tmp, struct elf_thread_status, list));
1861         }
1862
1863         kfree(info->prstatus);
1864         kfree(info->psinfo);
1865         kfree(info->notes);
1866         kfree(info->fpu);
1867 #ifdef ELF_CORE_COPY_XFPREGS
1868         kfree(info->xfpu);
1869 #endif
1870 }
1871
1872 #endif
1873
1874 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1875                                         struct vm_area_struct *gate_vma)
1876 {
1877         struct vm_area_struct *ret = tsk->mm->mmap;
1878
1879         if (ret)
1880                 return ret;
1881         return gate_vma;
1882 }
1883 /*
1884  * Helper function for iterating across a vma list.  It ensures that the caller
1885  * will visit `gate_vma' prior to terminating the search.
1886  */
1887 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1888                                         struct vm_area_struct *gate_vma)
1889 {
1890         struct vm_area_struct *ret;
1891
1892         ret = this_vma->vm_next;
1893         if (ret)
1894                 return ret;
1895         if (this_vma == gate_vma)
1896                 return NULL;
1897         return gate_vma;
1898 }
1899
1900 /*
1901  * Actual dumper
1902  *
1903  * This is a two-pass process; first we find the offsets of the bits,
1904  * and then they are actually written out.  If we run out of core limit
1905  * we just truncate.
1906  */
1907 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1908 {
1909         int has_dumped = 0;
1910         mm_segment_t fs;
1911         int segs;
1912         size_t size = 0;
1913         struct vm_area_struct *vma, *gate_vma;
1914         struct elfhdr *elf = NULL;
1915         loff_t offset = 0, dataoff, foffset;
1916         unsigned long mm_flags;
1917         struct elf_note_info info;
1918
1919         /*
1920          * We no longer stop all VM operations.
1921          * 
1922          * This is because those proceses that could possibly change map_count
1923          * or the mmap / vma pages are now blocked in do_exit on current
1924          * finishing this core dump.
1925          *
1926          * Only ptrace can touch these memory addresses, but it doesn't change
1927          * the map_count or the pages allocated. So no possibility of crashing
1928          * exists while dumping the mm->vm_next areas to the core file.
1929          */
1930   
1931         /* alloc memory for large data structures: too large to be on stack */
1932         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1933         if (!elf)
1934                 goto out;
1935         
1936         segs = current->mm->map_count;
1937 #ifdef ELF_CORE_EXTRA_PHDRS
1938         segs += ELF_CORE_EXTRA_PHDRS;
1939 #endif
1940
1941         gate_vma = get_gate_vma(current);
1942         if (gate_vma != NULL)
1943                 segs++;
1944
1945         /*
1946          * Collect all the non-memory information about the process for the
1947          * notes.  This also sets up the file header.
1948          */
1949         if (!fill_note_info(elf, segs + 1, /* including notes section */
1950                             &info, signr, regs))
1951                 goto cleanup;
1952
1953         has_dumped = 1;
1954         current->flags |= PF_DUMPCORE;
1955   
1956         fs = get_fs();
1957         set_fs(KERNEL_DS);
1958
1959         DUMP_WRITE(elf, sizeof(*elf));
1960         offset += sizeof(*elf);                         /* Elf header */
1961         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1962         foffset = offset;
1963
1964         /* Write notes phdr entry */
1965         {
1966                 struct elf_phdr phdr;
1967                 size_t sz = get_note_info_size(&info);
1968
1969                 sz += elf_coredump_extra_notes_size();
1970
1971                 fill_elf_note_phdr(&phdr, sz, offset);
1972                 offset += sz;
1973                 DUMP_WRITE(&phdr, sizeof(phdr));
1974         }
1975
1976         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1977
1978         /*
1979          * We must use the same mm->flags while dumping core to avoid
1980          * inconsistency between the program headers and bodies, otherwise an
1981          * unusable core file can be generated.
1982          */
1983         mm_flags = current->mm->flags;
1984
1985         /* Write program headers for segments dump */
1986         for (vma = first_vma(current, gate_vma); vma != NULL;
1987                         vma = next_vma(vma, gate_vma)) {
1988                 struct elf_phdr phdr;
1989
1990                 phdr.p_type = PT_LOAD;
1991                 phdr.p_offset = offset;
1992                 phdr.p_vaddr = vma->vm_start;
1993                 phdr.p_paddr = 0;
1994                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1995                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1996                 offset += phdr.p_filesz;
1997                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1998                 if (vma->vm_flags & VM_WRITE)
1999                         phdr.p_flags |= PF_W;
2000                 if (vma->vm_flags & VM_EXEC)
2001                         phdr.p_flags |= PF_X;
2002                 phdr.p_align = ELF_EXEC_PAGESIZE;
2003
2004                 DUMP_WRITE(&phdr, sizeof(phdr));
2005         }
2006
2007 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2008         ELF_CORE_WRITE_EXTRA_PHDRS;
2009 #endif
2010
2011         /* write out the notes section */
2012         if (!write_note_info(&info, file, &foffset))
2013                 goto end_coredump;
2014
2015         if (elf_coredump_extra_notes_write(file, &foffset))
2016                 goto end_coredump;
2017
2018         /* Align to page */
2019         DUMP_SEEK(dataoff - foffset);
2020
2021         for (vma = first_vma(current, gate_vma); vma != NULL;
2022                         vma = next_vma(vma, gate_vma)) {
2023                 unsigned long addr;
2024                 unsigned long end;
2025
2026                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2027
2028                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2029                         struct page *page;
2030                         struct vm_area_struct *tmp_vma;
2031
2032                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2033                                                 &page, &tmp_vma) <= 0) {
2034                                 DUMP_SEEK(PAGE_SIZE);
2035                         } else {
2036                                 if (page == ZERO_PAGE(0)) {
2037                                         if (!dump_seek(file, PAGE_SIZE)) {
2038                                                 page_cache_release(page);
2039                                                 goto end_coredump;
2040                                         }
2041                                 } else {
2042                                         void *kaddr;
2043                                         flush_cache_page(tmp_vma, addr,
2044                                                          page_to_pfn(page));
2045                                         kaddr = kmap(page);
2046                                         if ((size += PAGE_SIZE) > limit ||
2047                                             !dump_write(file, kaddr,
2048                                             PAGE_SIZE)) {
2049                                                 kunmap(page);
2050                                                 page_cache_release(page);
2051                                                 goto end_coredump;
2052                                         }
2053                                         kunmap(page);
2054                                 }
2055                                 page_cache_release(page);
2056                         }
2057                 }
2058         }
2059
2060 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2061         ELF_CORE_WRITE_EXTRA_DATA;
2062 #endif
2063
2064 end_coredump:
2065         set_fs(fs);
2066
2067 cleanup:
2068         free_note_info(&info);
2069         kfree(elf);
2070 out:
2071         return has_dumped;
2072 }
2073
2074 #endif          /* USE_ELF_CORE_DUMP */
2075
2076 static int __init init_elf_binfmt(void)
2077 {
2078         return register_binfmt(&elf_format);
2079 }
2080
2081 static void __exit exit_elf_binfmt(void)
2082 {
2083         /* Remove the COFF and ELF loaders. */
2084         unregister_binfmt(&elf_format);
2085 }
2086
2087 core_initcall(init_elf_binfmt);
2088 module_exit(exit_elf_binfmt);
2089 MODULE_LICENSE("GPL");