X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Ffork.c;h=b9e2edd00726538467877d458d7aeb7771c03015;hb=8e19608e8b5c001e4a66ce482edc474f05fb7355;hp=7407ab319875b638cc1779fffd3d30e4741c420f;hpb=e8e1abe92fd7ea9d823a3aaf81d10e2cba593b6b;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/fork.c b/kernel/fork.c index 7407ab3..b9e2edd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -60,7 +60,9 @@ #include #include #include +#include #include +#include #include #include @@ -151,9 +153,8 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); - security_task_free(tsk); - free_uid(tsk->user); - put_group_info(tsk->group_info); + put_cred(tsk->real_cred); + put_cred(tsk->cred); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) @@ -213,6 +214,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; + unsigned long *stackend; + int err; prepare_to_copy(orig); @@ -238,6 +241,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto out; setup_thread_stack(tsk, orig); + stackend = end_of_stack(tsk); + *stackend = STACK_END_MAGIC; /* for overflow detection */ #ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int(); @@ -280,7 +285,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; - cpus_clear(mm->cpu_vm_mask); + cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; @@ -319,17 +324,20 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; + struct address_space *mapping = file->f_mapping; + get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); - - /* insert tmp into the share list, just after mpnt */ - spin_lock(&file->f_mapping->i_mmap_lock); + spin_lock(&mapping->i_mmap_lock); + if (tmp->vm_flags & VM_SHARED) + mapping->i_mmap_writable++; tmp->vm_truncate_count = mpnt->vm_truncate_count; - flush_dcache_mmap_lock(file->f_mapping); + flush_dcache_mmap_lock(mapping); + /* insert tmp into the share list, just after mpnt */ vma_prio_tree_add(tmp, mpnt); - flush_dcache_mmap_unlock(file->f_mapping); - spin_unlock(&file->f_mapping->i_mmap_lock); + flush_dcache_mmap_unlock(mapping); + spin_unlock(&mapping->i_mmap_lock); } /* @@ -398,6 +406,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) +static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; + +static int __init coredump_filter_setup(char *s) +{ + default_dump_filter = + (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & + MMF_DUMP_FILTER_MASK; + return 1; +} + +__setup("coredump_filter=", coredump_filter_setup); + #include static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) @@ -406,15 +426,14 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); - mm->flags = (current->mm) ? current->mm->flags - : MMF_DUMP_FILTER_DEFAULT; + mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; mm->core_state = NULL; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); - rwlock_init(&mm->ioctx_list_lock); - mm->ioctx_list = NULL; + spin_lock_init(&mm->ioctx_lock); + INIT_HLIST_HEAD(&mm->ioctx_list); mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; mm_init_owner(mm, p); @@ -626,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) tsk->min_flt = tsk->maj_flt = 0; tsk->nvcsw = tsk->nivcsw = 0; +#ifdef CONFIG_DETECT_HUNG_TASK + tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; +#endif tsk->mm = NULL; tsk->active_mm = NULL; @@ -663,38 +685,21 @@ fail_nomem: return retval; } -static struct fs_struct *__copy_fs_struct(struct fs_struct *old) -{ - struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); - /* We don't need to lock fs - think why ;-) */ - if (fs) { - atomic_set(&fs->count, 1); - rwlock_init(&fs->lock); - fs->umask = old->umask; - read_lock(&old->lock); - fs->root = old->root; - path_get(&old->root); - fs->pwd = old->pwd; - path_get(&old->pwd); - read_unlock(&old->lock); - } - return fs; -} - -struct fs_struct *copy_fs_struct(struct fs_struct *old) -{ - return __copy_fs_struct(old); -} - -EXPORT_SYMBOL_GPL(copy_fs_struct); - static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) { + struct fs_struct *fs = current->fs; if (clone_flags & CLONE_FS) { - atomic_inc(¤t->fs->count); + /* tsk->fs is already what we want */ + write_lock(&fs->lock); + if (fs->in_exec) { + write_unlock(&fs->lock); + return -EAGAIN; + } + fs->users++; + write_unlock(&fs->lock); return 0; } - tsk->fs = __copy_fs_struct(current->fs); + tsk->fs = copy_fs_struct(fs); if (!tsk->fs) return -ENOMEM; return 0; @@ -756,7 +761,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) { struct sighand_struct *sig; - if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) { + if (clone_flags & CLONE_SIGHAND) { atomic_inc(¤t->sighand->count); return 0; } @@ -795,6 +800,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) sig->cputime_expires.virt_exp = cputime_zero; sig->cputime_expires.sched_exp = 0; + if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { + sig->cputime_expires.prof_exp = + secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); + sig->cputimer.running = 1; + } + /* The timer lists. */ INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); @@ -804,31 +815,24 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { struct signal_struct *sig; - int ret; if (clone_flags & CLONE_THREAD) { - ret = thread_group_cputime_clone_thread(current); - if (likely(!ret)) { - atomic_inc(¤t->signal->count); - atomic_inc(¤t->signal->live); - } - return ret; + atomic_inc(¤t->signal->count); + atomic_inc(¤t->signal->live); + return 0; } + sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); tsk->signal = sig; if (!sig) return -ENOMEM; - ret = copy_thread_group_keys(tsk); - if (ret < 0) { - kmem_cache_free(signal_cachep, sig); - return ret; - } - atomic_set(&sig->count, 1); atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); sig->flags = 0; + if (clone_flags & CLONE_NEWPID) + sig->flags |= SIGNAL_UNKILLABLE; sig->group_exit_code = 0; sig->group_exit_task = NULL; sig->group_stop_count = 0; @@ -844,13 +848,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->tty_old_pgrp = NULL; sig->tty = NULL; - sig->cutime = sig->cstime = cputime_zero; + sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; task_io_accounting_init(&sig->ioac); + sig->sum_sched_runtime = 0; taskstats_tgid_init(sig); task_lock(current->group_leader); @@ -869,7 +874,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_signal(struct signal_struct *sig) { thread_group_cputime_free(sig); - exit_thread_group_keys(sig); tty_kref_put(sig->tty); kmem_cache_free(signal_cachep, sig); } @@ -895,7 +899,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) clear_freeze_flag(p); } -asmlinkage long sys_set_tid_address(int __user *tidptr) +SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; @@ -985,22 +989,23 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; - if (atomic_read(&p->user->processes) >= + if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->user != current->nsproxy->user_ns->root_user) + p->real_cred->user != INIT_USER) goto bad_fork_free; } - atomic_inc(&p->user->__count); - atomic_inc(&p->user->processes); - get_group_info(p->group_info); + retval = copy_creds(p, clone_flags); + if (retval < 0) + goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ + retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; @@ -1035,11 +1040,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->default_timer_slack_ns = current->timer_slack_ns; -#ifdef CONFIG_DETECT_SOFTLOCKUP - p->last_switch_count = 0; - p->last_switch_timestamp = 0; -#endif - task_io_accounting_init(&p->ioac); acct_clear_integrals(p); @@ -1049,10 +1049,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); -#ifdef CONFIG_SECURITY - p->security = NULL; -#endif - p->cap_bset = current->cap_bset; p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1093,14 +1089,14 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif + if (unlikely(current->ptrace)) + ptrace_fork(p, clone_flags); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - if ((retval = security_task_alloc(p))) - goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) - goto bad_fork_cleanup_security; + goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; @@ -1114,24 +1110,22 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; - if ((retval = copy_keys(clone_flags, p))) - goto bad_fork_cleanup_mm; if ((retval = copy_namespaces(clone_flags, p))) - goto bad_fork_cleanup_keys; + goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; - retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); + retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; - pid = alloc_pid(task_active_pid_ns(p)); + pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; if (clone_flags & CLONE_NEWPID) { - retval = pid_ns_prepare_proc(task_active_pid_ns(p)); + retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); if (retval < 0) goto bad_fork_free_pid; } @@ -1179,10 +1173,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif clear_all_latency_tracing(p); - /* Our parent execution domain becomes current domain - These must match for thread signalling to apply */ - p->parent_exec_id = p->self_exec_id; - /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; @@ -1220,10 +1210,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ - if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) + if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; - else + p->parent_exec_id = current->parent_exec_id; + } else { p->real_parent = current; + p->parent_exec_id = current->self_exec_id; + } spin_lock(¤t->sighand->siglock); @@ -1259,8 +1252,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->signal->leader_pid = pid; tty_kref_put(p->signal->tty); p->signal->tty = tty_kref_get(current->signal->tty); - set_task_pgrp(p, task_pgrp_nr(current)); - set_task_session(p, task_session_nr(current)); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); @@ -1286,8 +1277,6 @@ bad_fork_cleanup_io: put_io_context(p->io_context); bad_fork_cleanup_namespaces: exit_task_namespaces(p); -bad_fork_cleanup_keys: - exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); @@ -1303,8 +1292,6 @@ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); -bad_fork_cleanup_security: - security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); @@ -1317,9 +1304,9 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: - put_group_info(p->group_info); - atomic_dec(&p->user->processes); - free_uid(p->user); + atomic_dec(&p->cred->user->processes); + put_cred(p->real_cred); + put_cred(p->cred); bad_fork_free: free_task(p); fork_out: @@ -1363,6 +1350,21 @@ long do_fork(unsigned long clone_flags, long nr; /* + * Do some preliminary argument and permissions checking before we + * actually start allocating stuff + */ + if (clone_flags & CLONE_NEWUSER) { + if (clone_flags & CLONE_THREAD) + return -EINVAL; + /* hopefully this check will go away when userns support is + * complete + */ + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || + !capable(CAP_SETGID)) + return -EPERM; + } + + /* * We hope to recycle these flags after 2.6.26 */ if (unlikely(clone_flags & CLONE_STOPPED)) { @@ -1406,6 +1408,7 @@ long do_fork(unsigned long clone_flags, init_completion(&vfork); } + audit_finish_fork(p); tracehook_report_clone(trace, regs, clone_flags, nr, p); /* @@ -1469,12 +1472,11 @@ void __init proc_caches_init(void) fs_cachep = kmem_cache_create("fs_cache", sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - vm_area_cachep = kmem_cache_create("vm_area_struct", - sizeof(struct vm_area_struct), 0, - SLAB_PANIC, NULL); mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); + mmap_init(); } /* @@ -1529,12 +1531,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) { struct fs_struct *fs = current->fs; - if ((unshare_flags & CLONE_FS) && - (fs && atomic_read(&fs->count) > 1)) { - *new_fsp = __copy_fs_struct(current->fs); - if (!*new_fsp) - return -ENOMEM; - } + if (!(unshare_flags & CLONE_FS) || !fs) + return 0; + + /* don't need lock here; in the worst case we'll do useless copy */ + if (fs->users == 1) + return 0; + + *new_fsp = copy_fs_struct(fs); + if (!*new_fsp) + return -ENOMEM; return 0; } @@ -1593,7 +1599,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp * constructed. Here we are modifying the current, active, * task_struct. */ -asmlinkage long sys_unshare(unsigned long unshare_flags) +SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; @@ -1609,8 +1615,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| - CLONE_NEWNET)) + CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) goto bad_unshare_out; /* @@ -1651,8 +1656,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) if (new_fs) { fs = current->fs; + write_lock(&fs->lock); current->fs = new_fs; - new_fs = fs; + if (--fs->users) + new_fs = NULL; + else + new_fs = fs; + write_unlock(&fs->lock); } if (new_mm) { @@ -1691,7 +1701,7 @@ bad_unshare_cleanup_sigh: bad_unshare_cleanup_fs: if (new_fs) - put_fs_struct(new_fs); + free_fs_struct(new_fs); bad_unshare_cleanup_thread: bad_unshare_out: