X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fexec.c;h=e19de6a80339b3ceeae267283962f271ac6c5e55;hb=1da083c9b23dafd6bcb08dcfec443e66e90efff0;hp=a2e6989dbc3a42ae3aae845592bcd2087cc57bc6;hpb=8c652f96d3852b97a49c331cd0bb02d22f3cb31b;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/exec.c b/fs/exec.c index a2e6989..e19de6a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,7 @@ int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; +unsigned int core_pipe_limit; int suid_dumpable = 0; /* The maximal length of core_pattern is also specified in sysctl.c */ @@ -69,17 +71,18 @@ int suid_dumpable = 0; static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); -int register_binfmt(struct linux_binfmt * fmt) +int __register_binfmt(struct linux_binfmt * fmt, int insert) { if (!fmt) return -EINVAL; write_lock(&binfmt_lock); - list_add(&fmt->lh, &formats); + insert ? list_add(&fmt->lh, &formats) : + list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); return 0; } -EXPORT_SYMBOL(register_binfmt); +EXPORT_SYMBOL(__register_binfmt); void unregister_binfmt(struct linux_binfmt * fmt) { @@ -104,40 +107,28 @@ static inline void put_binfmt(struct linux_binfmt * fmt) SYSCALL_DEFINE1(uselib, const char __user *, library) { struct file *file; - struct nameidata nd; char *tmp = getname(library); int error = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - error = path_lookup_open(AT_FDCWD, tmp, - LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - putname(tmp); - } - if (error) + if (IS_ERR(tmp)) + goto out; + + file = do_filp_open(AT_FDCWD, tmp, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_READ | MAY_EXEC | MAY_OPEN); + putname(tmp); + error = PTR_ERR(file); + if (IS_ERR(file)) goto out; error = -EINVAL; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) goto exit; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - error = inode_permission(nd.path.dentry->d_inode, - MAY_READ | MAY_EXEC | MAY_OPEN); - if (error) - goto exit; - error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN); - if (error) - goto exit; - - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; - fsnotify_open(file->f_path.dentry); error = -ENOEXEC; @@ -159,13 +150,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) } read_unlock(&binfmt_lock); } +exit: fput(file); out: return error; -exit: - release_open_intent(&nd); - path_put(&nd.path); - goto out; } #ifdef CONFIG_MMU @@ -207,7 +195,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * to work from. */ rlim = current->signal->rlim; - if (size > rlim[RLIMIT_STACK].rlim_cur / 4) { + if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) { put_page(page); return NULL; } @@ -254,10 +242,12 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ + BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; - vma->vm_flags = VM_STACK_FLAGS; + vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + INIT_LIST_HEAD(&vma->anon_vma_chain); err = insert_vm_struct(mm, vma); if (err) goto err; @@ -528,7 +518,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) /* * cover the whole range: [new_start, old_end) */ - vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL); + if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL)) + return -ENOMEM; /* * move the page tables downwards, on failure we rely on @@ -559,15 +550,13 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) tlb_finish_mmu(tlb, new_end, old_end); /* - * shrink the vma to just the new range. + * Shrink the vma to just the new range. Always succeeds. */ vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL); return 0; } -#define EXTRA_STACK_VM_PAGES 20 /* random */ - /* * Finalizes the stack vm_area_struct. The flags and permissions are updated, * the stack is optionally relocated, and some extra space is added. @@ -583,10 +572,13 @@ int setup_arg_pages(struct linux_binprm *bprm, struct vm_area_struct *prev = NULL; unsigned long vm_flags; unsigned long stack_base; + unsigned long stack_size; + unsigned long stack_expand; + unsigned long rlim_stack; #ifdef CONFIG_STACK_GROWSUP /* Limit stack size to 1GB */ - stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max; + stack_base = rlimit_max(RLIMIT_STACK); if (stack_base > (1 << 30)) stack_base = 1 << 30; @@ -625,6 +617,7 @@ int setup_arg_pages(struct linux_binprm *bprm, else if (executable_stack == EXSTACK_DISABLE_X) vm_flags &= ~VM_EXEC; vm_flags |= mm->def_flags; + vm_flags |= VM_STACK_INCOMPLETE_SETUP; ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, vm_flags); @@ -635,16 +628,30 @@ int setup_arg_pages(struct linux_binprm *bprm, /* Move stack pages down in memory. */ if (stack_shift) { ret = shift_arg_pages(vma, stack_shift); - if (ret) { - up_write(&mm->mmap_sem); - return ret; - } + if (ret) + goto out_unlock; } + /* mprotect_fixup is overkill to remove the temporary stack flags */ + vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP; + + stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */ + stack_size = vma->vm_end - vma->vm_start; + /* + * Align this down to a page boundary as expand_stack + * will align it up. + */ + rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK; #ifdef CONFIG_STACK_GROWSUP - stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE; + if (stack_size + stack_expand > rlim_stack) + stack_base = vma->vm_start + rlim_stack; + else + stack_base = vma->vm_end + stack_expand; #else - stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE; + if (stack_size + stack_expand > rlim_stack) + stack_base = vma->vm_end - rlim_stack; + else + stack_base = vma->vm_start - stack_expand; #endif ret = expand_stack(vma, stack_base); if (ret) @@ -652,7 +659,7 @@ int setup_arg_pages(struct linux_binprm *bprm, out_unlock: up_write(&mm->mmap_sem); - return 0; + return ret; } EXPORT_SYMBOL(setup_arg_pages); @@ -660,53 +667,39 @@ EXPORT_SYMBOL(setup_arg_pages); struct file *open_exec(const char *name) { - struct nameidata nd; struct file *file; int err; - err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - if (err) + file = do_filp_open(AT_FDCWD, name, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_EXEC | MAY_OPEN); + if (IS_ERR(file)) goto out; err = -EACCES; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) - goto out_path_put; - - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto out_path_put; - - err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); - if (err) - goto out_path_put; - err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN); - if (err) - goto out_path_put; + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) + goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - if (IS_ERR(file)) - return file; + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; fsnotify_open(file->f_path.dentry); err = deny_write_access(file); - if (err) { - fput(file); - goto out; - } + if (err) + goto exit; +out: return file; - out_path_put: - release_open_intent(&nd); - path_put(&nd.path); - out: +exit: + fput(file); return ERR_PTR(err); } EXPORT_SYMBOL(open_exec); -int kernel_read(struct file *file, unsigned long offset, - char *addr, unsigned long count) +int kernel_read(struct file *file, loff_t offset, + char *addr, unsigned long count) { mm_segment_t old_fs; loff_t pos = offset; @@ -730,6 +723,7 @@ static int exec_mmap(struct mm_struct *mm) /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; + sync_mm_rss(tsk, old_mm); mm_release(tsk, old_mm); if (old_mm) { @@ -774,7 +768,6 @@ static int de_thread(struct task_struct *tsk) struct signal_struct *sig = tsk->signal; struct sighand_struct *oldsighand = tsk->sighand; spinlock_t *lock = &oldsighand->siglock; - int count; if (thread_group_empty(tsk)) goto no_thread_group; @@ -791,13 +784,13 @@ static int de_thread(struct task_struct *tsk) spin_unlock_irq(lock); return -EAGAIN; } + sig->group_exit_task = tsk; - zap_other_threads(tsk); + sig->notify_count = zap_other_threads(tsk); + if (!thread_group_leader(tsk)) + sig->notify_count--; - /* Account for the thread group leader hanging around: */ - count = thread_group_leader(tsk) ? 1 : 2; - sig->notify_count = count; - while (atomic_read(&sig->count) > count) { + while (sig->notify_count) { __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irq(lock); schedule(); @@ -854,7 +847,9 @@ static int de_thread(struct task_struct *tsk) attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); transfer_pid(leader, tsk, PIDTYPE_PGID); transfer_pid(leader, tsk, PIDTYPE_SID); + list_replace_rcu(&leader->tasks, &tsk->tasks); + list_replace_init(&leader->sibling, &tsk->sibling); tsk->group_leader = tsk; leader->group_leader = tsk; @@ -872,6 +867,9 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = 0; no_thread_group: + if (current->mm) + setmax_mm_hiwater_rss(&sig->maxrss, current->mm); + exit_itimers(sig); flush_itimer_signals(); @@ -948,15 +946,23 @@ char *get_task_comm(char *buf, struct task_struct *tsk) void set_task_comm(struct task_struct *tsk, char *buf) { task_lock(tsk); + + /* + * Threads may access current->comm without holding + * the task lock, so write the string carefully. + * Readers without a lock may see incomplete new + * names but are safe from non-terminating string reads. + */ + memset(tsk->comm, 0, TASK_COMM_LEN); + wmb(); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); + perf_event_comm(tsk); } int flush_old_exec(struct linux_binprm * bprm) { - char * name; - int i, ch, retval; - char tcomm[sizeof(current->comm)]; + int retval; /* * Make sure we have a private signal table and that @@ -977,6 +983,25 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ + current->flags &= ~PF_RANDOMIZE; + flush_thread(); + current->personality &= ~bprm->per_clear; + + return 0; + +out: + return retval; +} +EXPORT_SYMBOL(flush_old_exec); + +void setup_new_exec(struct linux_binprm * bprm) +{ + int i, ch; + char * name; + char tcomm[sizeof(current->comm)]; + + arch_pick_mmap_layout(current->mm); + /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -998,9 +1023,6 @@ int flush_old_exec(struct linux_binprm * bprm) tcomm[i] = '\0'; set_task_comm(current, tcomm); - current->flags &= ~PF_RANDOMIZE; - flush_thread(); - /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on * some architectures like powerpc @@ -1016,7 +1038,12 @@ int flush_old_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); } - current->personality &= ~bprm->per_clear; + /* + * Flush performance counters when crossing a + * security domain: + */ + if (!get_dumpable(current->mm)) + perf_event_exit_task(current); /* An exec changes our domain. We are no longer part of the thread group */ @@ -1025,14 +1052,37 @@ int flush_old_exec(struct linux_binprm * bprm) flush_signal_handlers(current, 0); flush_old_files(current->files); +} +EXPORT_SYMBOL(setup_new_exec); - return 0; +/* + * Prepare credentials and lock ->cred_guard_mutex. + * install_exec_creds() commits the new creds and drops the lock. + * Or, if exec fails before, free_bprm() should release ->cred and + * and unlock. + */ +int prepare_bprm_creds(struct linux_binprm *bprm) +{ + if (mutex_lock_interruptible(¤t->cred_guard_mutex)) + return -ERESTARTNOINTR; -out: - return retval; + bprm->cred = prepare_exec_creds(); + if (likely(bprm->cred)) + return 0; + + mutex_unlock(¤t->cred_guard_mutex); + return -ENOMEM; } -EXPORT_SYMBOL(flush_old_exec); +void free_bprm(struct linux_binprm *bprm) +{ + free_arg_pages(bprm); + if (bprm->cred) { + mutex_unlock(¤t->cred_guard_mutex); + abort_creds(bprm->cred); + } + kfree(bprm); +} /* * install the new credentials for this executable @@ -1043,24 +1093,24 @@ void install_exec_creds(struct linux_binprm *bprm) commit_creds(bprm->cred); bprm->cred = NULL; - - /* cred_exec_mutex must be held at least to this point to prevent + /* + * cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's - * credentials; any time after this it may be unlocked */ - + * credentials; any time after this it may be unlocked. + */ security_bprm_committed_creds(bprm); + mutex_unlock(¤t->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program - * - the caller must hold current->cred_exec_mutex to protect against + * - the caller must hold current->cred_guard_mutex to protect against * PTRACE_ATTACH */ int check_unsafe_exec(struct linux_binprm *bprm) { struct task_struct *p = current, *t; - unsigned long flags; unsigned n_fs; int res = 0; @@ -1068,11 +1118,12 @@ int check_unsafe_exec(struct linux_binprm *bprm) n_fs = 1; write_lock(&p->fs->lock); - lock_task_sighand(p, &flags); + rcu_read_lock(); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) n_fs++; } + rcu_read_unlock(); if (p->fs->users > n_fs) { bprm->unsafe |= LSM_UNSAFE_SHARE; @@ -1083,8 +1134,6 @@ int check_unsafe_exec(struct linux_binprm *bprm) res = 1; } } - - unlock_task_sighand(p, &flags); write_unlock(&p->fs->lock); return res; @@ -1197,9 +1246,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) retval = security_bprm_check(bprm); if (retval) return retval; - retval = ima_bprm_check(bprm); - if (retval) - return retval; /* kernel module loader fixup */ /* so we don't try to load run modprobe in kernel space. */ @@ -1267,14 +1313,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) EXPORT_SYMBOL(search_binary_handler); -void free_bprm(struct linux_binprm *bprm) -{ - free_arg_pages(bprm); - if (bprm->cred) - abort_creds(bprm->cred); - kfree(bprm); -} - /* * sys_execve() executes a new program. */ @@ -1298,20 +1336,15 @@ int do_execve(char * filename, if (!bprm) goto out_files; - retval = mutex_lock_interruptible(¤t->cred_exec_mutex); - if (retval < 0) + retval = prepare_bprm_creds(bprm); + if (retval) goto out_free; - current->in_execve = 1; - - retval = -ENOMEM; - bprm->cred = prepare_exec_creds(); - if (!bprm->cred) - goto out_unlock; retval = check_unsafe_exec(bprm); if (retval < 0) - goto out_unlock; + goto out_free; clear_in_exec = retval; + current->in_execve = 1; file = open_exec(filename); retval = PTR_ERR(file); @@ -1361,7 +1394,6 @@ int do_execve(char * filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); acct_update_integrals(current); free_bprm(bprm); if (displaced) @@ -1381,10 +1413,7 @@ out_file: out_unmark: if (clear_in_exec) current->fs->in_exec = 0; - -out_unlock: current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); out_free: free_bprm(bprm); @@ -1396,18 +1425,16 @@ out_ret: return retval; } -int set_binfmt(struct linux_binfmt *new) +void set_binfmt(struct linux_binfmt *new) { - struct linux_binfmt *old = current->binfmt; + struct mm_struct *mm = current->mm; - if (new) { - if (!try_module_get(new->module)) - return -1; - } - current->binfmt = new; - if (old) - module_put(old->module); - return 0; + if (mm->binfmt) + module_put(mm->binfmt->module); + + mm->binfmt = new; + if (new) + __module_get(new->module); } EXPORT_SYMBOL(set_binfmt); @@ -1508,7 +1535,7 @@ static int format_corename(char *corename, long signr) /* core limit size */ case 'c': rc = snprintf(out_ptr, out_end - out_ptr, - "%lu", current->signal->rlim[RLIMIT_CORE].rlim_cur); + "%lu", rlimit(RLIMIT_CORE)); if (rc > out_end - out_ptr) goto out; out_ptr += rc; @@ -1536,12 +1563,13 @@ out: return ispipe; } -static int zap_process(struct task_struct *start) +static int zap_process(struct task_struct *start, int exit_code) { struct task_struct *t; int nr = 0; start->signal->flags = SIGNAL_GROUP_EXIT; + start->signal->group_exit_code = exit_code; start->signal->group_stop_count = 0; t = start; @@ -1566,8 +1594,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, spin_lock_irq(&tsk->sighand->siglock); if (!signal_group_exit(tsk->signal)) { mm->core_state = core_state; - tsk->signal->group_exit_code = exit_code; - nr = zap_process(tsk); + nr = zap_process(tsk, exit_code); } spin_unlock_irq(&tsk->sighand->siglock); if (unlikely(nr < 0)) @@ -1616,7 +1643,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (p->mm) { if (unlikely(p->mm == mm)) { lock_task_sighand(p, &flags); - nr += zap_process(p); + nr += zap_process(p, exit_code); unlock_task_sighand(p, &flags); } break; @@ -1634,12 +1661,15 @@ static int coredump_wait(int exit_code, struct core_state *core_state) struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; struct completion *vfork_done; - int core_waiters; + int core_waiters = -EBUSY; init_completion(&core_state->startup); core_state->dumper.task = tsk; core_state->dumper.next = NULL; - core_waiters = zap_threads(tsk, mm, core_state, exit_code); + + down_write(&mm->mmap_sem); + if (!mm->core_state) + core_waiters = zap_threads(tsk, mm, core_state, exit_code); up_write(&mm->mmap_sem); if (unlikely(core_waiters < 0)) @@ -1723,69 +1753,135 @@ void set_dumpable(struct mm_struct *mm, int value) } } -int get_dumpable(struct mm_struct *mm) +static int __get_dumpable(unsigned long mm_flags) { int ret; - ret = mm->flags & 0x3; + ret = mm_flags & MMF_DUMPABLE_MASK; return (ret >= 2) ? 2 : ret; } +int get_dumpable(struct mm_struct *mm) +{ + return __get_dumpable(mm->flags); +} + +static void wait_for_dump_helpers(struct file *file) +{ + struct pipe_inode_info *pipe; + + pipe = file->f_path.dentry->d_inode->i_pipe; + + pipe_lock(pipe); + pipe->readers++; + pipe->writers--; + + while ((pipe->readers > 1) && (!signal_pending(current))) { + wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + pipe_wait(pipe); + } + + pipe->readers--; + pipe->writers++; + pipe_unlock(pipe); + +} + + +/* + * uhm_pipe_setup + * helper function to customize the process used + * to collect the core in userspace. Specifically + * it sets up a pipe and installs it as fd 0 (stdin) + * for the process. Returns 0 on success, or + * PTR_ERR on failure. + * Note that it also sets the core limit to 1. This + * is a special value that we use to trap recursive + * core dumps + */ +static int umh_pipe_setup(struct subprocess_info *info) +{ + struct file *rp, *wp; + struct fdtable *fdt; + struct coredump_params *cp = (struct coredump_params *)info->data; + struct files_struct *cf = current->files; + + wp = create_write_pipe(0); + if (IS_ERR(wp)) + return PTR_ERR(wp); + + rp = create_read_pipe(wp, 0); + if (IS_ERR(rp)) { + free_write_pipe(wp); + return PTR_ERR(rp); + } + + cp->file = wp; + + sys_close(0); + fd_install(0, rp); + spin_lock(&cf->file_lock); + fdt = files_fdtable(cf); + FD_SET(0, fdt->open_fds); + FD_CLR(0, fdt->close_on_exec); + spin_unlock(&cf->file_lock); + + /* and disallow core files too */ + current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; + + return 0; +} + void do_coredump(long signr, int exit_code, struct pt_regs *regs) { struct core_state core_state; char corename[CORENAME_MAX_SIZE + 1]; struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; - struct inode * inode; - struct file * file; const struct cred *old_cred; struct cred *cred; int retval = 0; int flag = 0; - int ispipe = 0; - unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; - char **helper_argv = NULL; - int helper_argc = 0; - char *delimit; + int ispipe; + static atomic_t core_dump_count = ATOMIC_INIT(0); + struct coredump_params cprm = { + .signr = signr, + .regs = regs, + .limit = rlimit(RLIMIT_CORE), + /* + * We must use the same mm->flags while dumping core to avoid + * inconsistency of bit flags, since this flag is not protected + * by any locks. + */ + .mm_flags = mm->flags, + }; audit_core_dumps(signr); - binfmt = current->binfmt; + binfmt = mm->binfmt; if (!binfmt || !binfmt->core_dump) goto fail; + if (!__get_dumpable(cprm.mm_flags)) + goto fail; cred = prepare_creds(); - if (!cred) { - retval = -ENOMEM; + if (!cred) goto fail; - } - - down_write(&mm->mmap_sem); - /* - * If another thread got here first, or we are not dumpable, bail out. - */ - if (mm->core_state || !get_dumpable(mm)) { - up_write(&mm->mmap_sem); - put_cred(cred); - goto fail; - } - /* * We cannot trust fsuid as being the "true" uid of the * process nor do we know its entire history. We only know it * was tainted so we dump it as root in mode 2. */ - if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ + if (__get_dumpable(cprm.mm_flags) == 2) { + /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = 0; /* Dump root private */ } retval = coredump_wait(exit_code, &core_state); - if (retval < 0) { - put_cred(cred); - goto fail; - } + if (retval < 0) + goto fail_creds; old_cred = override_creds(cred); @@ -1802,90 +1898,110 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) lock_kernel(); ispipe = format_corename(corename, signr); unlock_kernel(); - /* - * Don't bother to check the RLIMIT_CORE value if core_pattern points - * to a pipe. Since we're not writing directly to the filesystem - * RLIMIT_CORE doesn't really apply, as no actual core file will be - * created unless the pipe reader choses to write out the core file - * at which point file size limits and permissions will be imposed - * as it does with any other process - */ - if ((!ispipe) && (core_limit < binfmt->min_coredump)) - goto fail_unlock; if (ispipe) { - helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); - if (!helper_argv) { - printk(KERN_WARNING "%s failed to allocate memory\n", - __func__); + int dump_count; + char **helper_argv; + + if (cprm.limit == 1) { + /* + * Normally core limits are irrelevant to pipes, since + * we're not writing to the file system, but we use + * cprm.limit of 1 here as a speacial value. Any + * non-1 limit gets set to RLIM_INFINITY below, but + * a limit of 0 skips the dump. This is a consistent + * way to catch recursive crashes. We can still crash + * if the core_pattern binary sets RLIM_CORE = !1 + * but it runs as root, and can do lots of stupid things + * Note that we use task_tgid_vnr here to grab the pid + * of the process group leader. That way we get the + * right pid if a thread in a multi-threaded + * core_pattern process dies. + */ + printk(KERN_WARNING + "Process %d(%s) has RLIMIT_CORE set to 1\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Aborting core\n"); goto fail_unlock; } - /* Terminate the string before the first option */ - delimit = strchr(corename, ' '); - if (delimit) - *delimit = '\0'; - delimit = strrchr(helper_argv[0], '/'); - if (delimit) - delimit++; - else - delimit = helper_argv[0]; - if (!strcmp(delimit, current->comm)) { - printk(KERN_NOTICE "Recursive core dump detected, " - "aborting\n"); - goto fail_unlock; + cprm.limit = RLIM_INFINITY; + + dump_count = atomic_inc_return(&core_dump_count); + if (core_pipe_limit && (core_pipe_limit < dump_count)) { + printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_dropcount; } - core_limit = RLIM_INFINITY; + helper_argv = argv_split(GFP_KERNEL, corename+1, NULL); + if (!helper_argv) { + printk(KERN_WARNING "%s failed to allocate memory\n", + __func__); + goto fail_dropcount; + } - /* SIGPIPE can happen, but it's just never processed */ - if (call_usermodehelper_pipe(corename+1, helper_argv, NULL, - &file)) { + retval = call_usermodehelper_fns(helper_argv[0], helper_argv, + NULL, UMH_WAIT_EXEC, umh_pipe_setup, + NULL, &cprm); + argv_free(helper_argv); + if (retval) { printk(KERN_INFO "Core dump to %s pipe failed\n", corename); - goto fail_unlock; + goto close_fail; } - } else - file = filp_open(corename, + } else { + struct inode *inode; + + if (cprm.limit < binfmt->min_coredump) + goto fail_unlock; + + cprm.file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); - if (IS_ERR(file)) - goto fail_unlock; - inode = file->f_path.dentry->d_inode; - if (inode->i_nlink > 1) - goto close_fail; /* multiple links - don't dump */ - if (!ispipe && d_unhashed(file->f_path.dentry)) - goto close_fail; - - /* AK: actually i see no reason to not allow this for named pipes etc., - but keep the previous behaviour for now. */ - if (!ispipe && !S_ISREG(inode->i_mode)) - goto close_fail; - /* - * Dont allow local users get cute and trick others to coredump - * into their pre-created files: - */ - if (inode->i_uid != current_fsuid()) - goto close_fail; - if (!file->f_op) - goto close_fail; - if (!file->f_op->write) - goto close_fail; - if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0) - goto close_fail; + if (IS_ERR(cprm.file)) + goto fail_unlock; - retval = binfmt->core_dump(signr, regs, file, core_limit); + inode = cprm.file->f_path.dentry->d_inode; + if (inode->i_nlink > 1) + goto close_fail; + if (d_unhashed(cprm.file->f_path.dentry)) + goto close_fail; + /* + * AK: actually i see no reason to not allow this for named + * pipes etc, but keep the previous behaviour for now. + */ + if (!S_ISREG(inode->i_mode)) + goto close_fail; + /* + * Dont allow local users get cute and trick others to coredump + * into their pre-created files. + */ + if (inode->i_uid != current_fsuid()) + goto close_fail; + if (!cprm.file->f_op || !cprm.file->f_op->write) + goto close_fail; + if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) + goto close_fail; + } + retval = binfmt->core_dump(&cprm); if (retval) current->signal->group_exit_code |= 0x80; + + if (ispipe && core_pipe_limit) + wait_for_dump_helpers(cprm.file); close_fail: - filp_close(file, NULL); + if (cprm.file) + filp_close(cprm.file, NULL); +fail_dropcount: + if (ispipe) + atomic_dec(&core_dump_count); fail_unlock: - if (helper_argv) - argv_free(helper_argv); - + coredump_finish(mm); revert_creds(old_cred); +fail_creds: put_cred(cred); - coredump_finish(mm); fail: return; }