signals: protect cinit from unblocked SIG_DFL signals
[safe/jmp/linux-2.6] / kernel / signal.c
1 /*
2  *  linux/kernel/signal.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  *
6  *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
7  *
8  *  2003-06-02  Jim Houston - Concurrent Computer Corp.
9  *              Changes to use preallocated sigqueue structures
10  *              to allow signals to be sent reliably.
11  */
12
13 #include <linux/slab.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/sched.h>
17 #include <linux/fs.h>
18 #include <linux/tty.h>
19 #include <linux/binfmts.h>
20 #include <linux/security.h>
21 #include <linux/syscalls.h>
22 #include <linux/ptrace.h>
23 #include <linux/signal.h>
24 #include <linux/signalfd.h>
25 #include <linux/tracehook.h>
26 #include <linux/capability.h>
27 #include <linux/freezer.h>
28 #include <linux/pid_namespace.h>
29 #include <linux/nsproxy.h>
30 #include <trace/sched.h>
31
32 #include <asm/param.h>
33 #include <asm/uaccess.h>
34 #include <asm/unistd.h>
35 #include <asm/siginfo.h>
36 #include "audit.h"      /* audit_signal_info() */
37
38 /*
39  * SLAB caches for signal bits.
40  */
41
42 static struct kmem_cache *sigqueue_cachep;
43
44 DEFINE_TRACE(sched_signal_send);
45
46 static void __user *sig_handler(struct task_struct *t, int sig)
47 {
48         return t->sighand->action[sig - 1].sa.sa_handler;
49 }
50
51 static int sig_handler_ignored(void __user *handler, int sig)
52 {
53         /* Is it explicitly or implicitly ignored? */
54         return handler == SIG_IGN ||
55                 (handler == SIG_DFL && sig_kernel_ignore(sig));
56 }
57
58 static int sig_task_ignored(struct task_struct *t, int sig,
59                 int from_ancestor_ns)
60 {
61         void __user *handler;
62
63         handler = sig_handler(t, sig);
64
65         if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
66                         handler == SIG_DFL && !from_ancestor_ns)
67                 return 1;
68
69         return sig_handler_ignored(handler, sig);
70 }
71
72 static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns)
73 {
74         /*
75          * Blocked signals are never ignored, since the
76          * signal handler may change by the time it is
77          * unblocked.
78          */
79         if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
80                 return 0;
81
82         if (!sig_task_ignored(t, sig, from_ancestor_ns))
83                 return 0;
84
85         /*
86          * Tracers may want to know about even ignored signals.
87          */
88         return !tracehook_consider_ignored_signal(t, sig);
89 }
90
91 /*
92  * Re-calculate pending state from the set of locally pending
93  * signals, globally pending signals, and blocked signals.
94  */
95 static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
96 {
97         unsigned long ready;
98         long i;
99
100         switch (_NSIG_WORDS) {
101         default:
102                 for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
103                         ready |= signal->sig[i] &~ blocked->sig[i];
104                 break;
105
106         case 4: ready  = signal->sig[3] &~ blocked->sig[3];
107                 ready |= signal->sig[2] &~ blocked->sig[2];
108                 ready |= signal->sig[1] &~ blocked->sig[1];
109                 ready |= signal->sig[0] &~ blocked->sig[0];
110                 break;
111
112         case 2: ready  = signal->sig[1] &~ blocked->sig[1];
113                 ready |= signal->sig[0] &~ blocked->sig[0];
114                 break;
115
116         case 1: ready  = signal->sig[0] &~ blocked->sig[0];
117         }
118         return ready != 0;
119 }
120
121 #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
122
123 static int recalc_sigpending_tsk(struct task_struct *t)
124 {
125         if (t->signal->group_stop_count > 0 ||
126             PENDING(&t->pending, &t->blocked) ||
127             PENDING(&t->signal->shared_pending, &t->blocked)) {
128                 set_tsk_thread_flag(t, TIF_SIGPENDING);
129                 return 1;
130         }
131         /*
132          * We must never clear the flag in another thread, or in current
133          * when it's possible the current syscall is returning -ERESTART*.
134          * So we don't clear it here, and only callers who know they should do.
135          */
136         return 0;
137 }
138
139 /*
140  * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
141  * This is superfluous when called on current, the wakeup is a harmless no-op.
142  */
143 void recalc_sigpending_and_wake(struct task_struct *t)
144 {
145         if (recalc_sigpending_tsk(t))
146                 signal_wake_up(t, 0);
147 }
148
149 void recalc_sigpending(void)
150 {
151         if (unlikely(tracehook_force_sigpending()))
152                 set_thread_flag(TIF_SIGPENDING);
153         else if (!recalc_sigpending_tsk(current) && !freezing(current))
154                 clear_thread_flag(TIF_SIGPENDING);
155
156 }
157
158 /* Given the mask, find the first available signal that should be serviced. */
159
160 int next_signal(struct sigpending *pending, sigset_t *mask)
161 {
162         unsigned long i, *s, *m, x;
163         int sig = 0;
164         
165         s = pending->signal.sig;
166         m = mask->sig;
167         switch (_NSIG_WORDS) {
168         default:
169                 for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m)
170                         if ((x = *s &~ *m) != 0) {
171                                 sig = ffz(~x) + i*_NSIG_BPW + 1;
172                                 break;
173                         }
174                 break;
175
176         case 2: if ((x = s[0] &~ m[0]) != 0)
177                         sig = 1;
178                 else if ((x = s[1] &~ m[1]) != 0)
179                         sig = _NSIG_BPW + 1;
180                 else
181                         break;
182                 sig += ffz(~x);
183                 break;
184
185         case 1: if ((x = *s &~ *m) != 0)
186                         sig = ffz(~x) + 1;
187                 break;
188         }
189         
190         return sig;
191 }
192
193 /*
194  * allocate a new signal queue record
195  * - this may be called without locks if and only if t == current, otherwise an
196  *   appopriate lock must be held to stop the target task from exiting
197  */
198 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
199                                          int override_rlimit)
200 {
201         struct sigqueue *q = NULL;
202         struct user_struct *user;
203
204         /*
205          * We won't get problems with the target's UID changing under us
206          * because changing it requires RCU be used, and if t != current, the
207          * caller must be holding the RCU readlock (by way of a spinlock) and
208          * we use RCU protection here
209          */
210         user = get_uid(__task_cred(t)->user);
211         atomic_inc(&user->sigpending);
212         if (override_rlimit ||
213             atomic_read(&user->sigpending) <=
214                         t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
215                 q = kmem_cache_alloc(sigqueue_cachep, flags);
216         if (unlikely(q == NULL)) {
217                 atomic_dec(&user->sigpending);
218                 free_uid(user);
219         } else {
220                 INIT_LIST_HEAD(&q->list);
221                 q->flags = 0;
222                 q->user = user;
223         }
224
225         return q;
226 }
227
228 static void __sigqueue_free(struct sigqueue *q)
229 {
230         if (q->flags & SIGQUEUE_PREALLOC)
231                 return;
232         atomic_dec(&q->user->sigpending);
233         free_uid(q->user);
234         kmem_cache_free(sigqueue_cachep, q);
235 }
236
237 void flush_sigqueue(struct sigpending *queue)
238 {
239         struct sigqueue *q;
240
241         sigemptyset(&queue->signal);
242         while (!list_empty(&queue->list)) {
243                 q = list_entry(queue->list.next, struct sigqueue , list);
244                 list_del_init(&q->list);
245                 __sigqueue_free(q);
246         }
247 }
248
249 /*
250  * Flush all pending signals for a task.
251  */
252 void flush_signals(struct task_struct *t)
253 {
254         unsigned long flags;
255
256         spin_lock_irqsave(&t->sighand->siglock, flags);
257         clear_tsk_thread_flag(t, TIF_SIGPENDING);
258         flush_sigqueue(&t->pending);
259         flush_sigqueue(&t->signal->shared_pending);
260         spin_unlock_irqrestore(&t->sighand->siglock, flags);
261 }
262
263 static void __flush_itimer_signals(struct sigpending *pending)
264 {
265         sigset_t signal, retain;
266         struct sigqueue *q, *n;
267
268         signal = pending->signal;
269         sigemptyset(&retain);
270
271         list_for_each_entry_safe(q, n, &pending->list, list) {
272                 int sig = q->info.si_signo;
273
274                 if (likely(q->info.si_code != SI_TIMER)) {
275                         sigaddset(&retain, sig);
276                 } else {
277                         sigdelset(&signal, sig);
278                         list_del_init(&q->list);
279                         __sigqueue_free(q);
280                 }
281         }
282
283         sigorsets(&pending->signal, &signal, &retain);
284 }
285
286 void flush_itimer_signals(void)
287 {
288         struct task_struct *tsk = current;
289         unsigned long flags;
290
291         spin_lock_irqsave(&tsk->sighand->siglock, flags);
292         __flush_itimer_signals(&tsk->pending);
293         __flush_itimer_signals(&tsk->signal->shared_pending);
294         spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
295 }
296
297 void ignore_signals(struct task_struct *t)
298 {
299         int i;
300
301         for (i = 0; i < _NSIG; ++i)
302                 t->sighand->action[i].sa.sa_handler = SIG_IGN;
303
304         flush_signals(t);
305 }
306
307 /*
308  * Flush all handlers for a task.
309  */
310
311 void
312 flush_signal_handlers(struct task_struct *t, int force_default)
313 {
314         int i;
315         struct k_sigaction *ka = &t->sighand->action[0];
316         for (i = _NSIG ; i != 0 ; i--) {
317                 if (force_default || ka->sa.sa_handler != SIG_IGN)
318                         ka->sa.sa_handler = SIG_DFL;
319                 ka->sa.sa_flags = 0;
320                 sigemptyset(&ka->sa.sa_mask);
321                 ka++;
322         }
323 }
324
325 int unhandled_signal(struct task_struct *tsk, int sig)
326 {
327         void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
328         if (is_global_init(tsk))
329                 return 1;
330         if (handler != SIG_IGN && handler != SIG_DFL)
331                 return 0;
332         return !tracehook_consider_fatal_signal(tsk, sig);
333 }
334
335
336 /* Notify the system that a driver wants to block all signals for this
337  * process, and wants to be notified if any signals at all were to be
338  * sent/acted upon.  If the notifier routine returns non-zero, then the
339  * signal will be acted upon after all.  If the notifier routine returns 0,
340  * then then signal will be blocked.  Only one block per process is
341  * allowed.  priv is a pointer to private data that the notifier routine
342  * can use to determine if the signal should be blocked or not.  */
343
344 void
345 block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
346 {
347         unsigned long flags;
348
349         spin_lock_irqsave(&current->sighand->siglock, flags);
350         current->notifier_mask = mask;
351         current->notifier_data = priv;
352         current->notifier = notifier;
353         spin_unlock_irqrestore(&current->sighand->siglock, flags);
354 }
355
356 /* Notify the system that blocking has ended. */
357
358 void
359 unblock_all_signals(void)
360 {
361         unsigned long flags;
362
363         spin_lock_irqsave(&current->sighand->siglock, flags);
364         current->notifier = NULL;
365         current->notifier_data = NULL;
366         recalc_sigpending();
367         spin_unlock_irqrestore(&current->sighand->siglock, flags);
368 }
369
370 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
371 {
372         struct sigqueue *q, *first = NULL;
373
374         /*
375          * Collect the siginfo appropriate to this signal.  Check if
376          * there is another siginfo for the same signal.
377         */
378         list_for_each_entry(q, &list->list, list) {
379                 if (q->info.si_signo == sig) {
380                         if (first)
381                                 goto still_pending;
382                         first = q;
383                 }
384         }
385
386         sigdelset(&list->signal, sig);
387
388         if (first) {
389 still_pending:
390                 list_del_init(&first->list);
391                 copy_siginfo(info, &first->info);
392                 __sigqueue_free(first);
393         } else {
394                 /* Ok, it wasn't in the queue.  This must be
395                    a fast-pathed signal or we must have been
396                    out of queue space.  So zero out the info.
397                  */
398                 info->si_signo = sig;
399                 info->si_errno = 0;
400                 info->si_code = 0;
401                 info->si_pid = 0;
402                 info->si_uid = 0;
403         }
404 }
405
406 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
407                         siginfo_t *info)
408 {
409         int sig = next_signal(pending, mask);
410
411         if (sig) {
412                 if (current->notifier) {
413                         if (sigismember(current->notifier_mask, sig)) {
414                                 if (!(current->notifier)(current->notifier_data)) {
415                                         clear_thread_flag(TIF_SIGPENDING);
416                                         return 0;
417                                 }
418                         }
419                 }
420
421                 collect_signal(sig, pending, info);
422         }
423
424         return sig;
425 }
426
427 /*
428  * Dequeue a signal and return the element to the caller, which is 
429  * expected to free it.
430  *
431  * All callers have to hold the siglock.
432  */
433 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
434 {
435         int signr;
436
437         /* We only dequeue private signals from ourselves, we don't let
438          * signalfd steal them
439          */
440         signr = __dequeue_signal(&tsk->pending, mask, info);
441         if (!signr) {
442                 signr = __dequeue_signal(&tsk->signal->shared_pending,
443                                          mask, info);
444                 /*
445                  * itimer signal ?
446                  *
447                  * itimers are process shared and we restart periodic
448                  * itimers in the signal delivery path to prevent DoS
449                  * attacks in the high resolution timer case. This is
450                  * compliant with the old way of self restarting
451                  * itimers, as the SIGALRM is a legacy signal and only
452                  * queued once. Changing the restart behaviour to
453                  * restart the timer in the signal dequeue path is
454                  * reducing the timer noise on heavy loaded !highres
455                  * systems too.
456                  */
457                 if (unlikely(signr == SIGALRM)) {
458                         struct hrtimer *tmr = &tsk->signal->real_timer;
459
460                         if (!hrtimer_is_queued(tmr) &&
461                             tsk->signal->it_real_incr.tv64 != 0) {
462                                 hrtimer_forward(tmr, tmr->base->get_time(),
463                                                 tsk->signal->it_real_incr);
464                                 hrtimer_restart(tmr);
465                         }
466                 }
467         }
468
469         recalc_sigpending();
470         if (!signr)
471                 return 0;
472
473         if (unlikely(sig_kernel_stop(signr))) {
474                 /*
475                  * Set a marker that we have dequeued a stop signal.  Our
476                  * caller might release the siglock and then the pending
477                  * stop signal it is about to process is no longer in the
478                  * pending bitmasks, but must still be cleared by a SIGCONT
479                  * (and overruled by a SIGKILL).  So those cases clear this
480                  * shared flag after we've set it.  Note that this flag may
481                  * remain set after the signal we return is ignored or
482                  * handled.  That doesn't matter because its only purpose
483                  * is to alert stop-signal processing code when another
484                  * processor has come along and cleared the flag.
485                  */
486                 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
487         }
488         if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
489                 /*
490                  * Release the siglock to ensure proper locking order
491                  * of timer locks outside of siglocks.  Note, we leave
492                  * irqs disabled here, since the posix-timers code is
493                  * about to disable them again anyway.
494                  */
495                 spin_unlock(&tsk->sighand->siglock);
496                 do_schedule_next_timer(info);
497                 spin_lock(&tsk->sighand->siglock);
498         }
499         return signr;
500 }
501
502 /*
503  * Tell a process that it has a new active signal..
504  *
505  * NOTE! we rely on the previous spin_lock to
506  * lock interrupts for us! We can only be called with
507  * "siglock" held, and the local interrupt must
508  * have been disabled when that got acquired!
509  *
510  * No need to set need_resched since signal event passing
511  * goes through ->blocked
512  */
513 void signal_wake_up(struct task_struct *t, int resume)
514 {
515         unsigned int mask;
516
517         set_tsk_thread_flag(t, TIF_SIGPENDING);
518
519         /*
520          * For SIGKILL, we want to wake it up in the stopped/traced/killable
521          * case. We don't check t->state here because there is a race with it
522          * executing another processor and just now entering stopped state.
523          * By using wake_up_state, we ensure the process will wake up and
524          * handle its death signal.
525          */
526         mask = TASK_INTERRUPTIBLE;
527         if (resume)
528                 mask |= TASK_WAKEKILL;
529         if (!wake_up_state(t, mask))
530                 kick_process(t);
531 }
532
533 /*
534  * Remove signals in mask from the pending set and queue.
535  * Returns 1 if any signals were found.
536  *
537  * All callers must be holding the siglock.
538  *
539  * This version takes a sigset mask and looks at all signals,
540  * not just those in the first mask word.
541  */
542 static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
543 {
544         struct sigqueue *q, *n;
545         sigset_t m;
546
547         sigandsets(&m, mask, &s->signal);
548         if (sigisemptyset(&m))
549                 return 0;
550
551         signandsets(&s->signal, &s->signal, mask);
552         list_for_each_entry_safe(q, n, &s->list, list) {
553                 if (sigismember(mask, q->info.si_signo)) {
554                         list_del_init(&q->list);
555                         __sigqueue_free(q);
556                 }
557         }
558         return 1;
559 }
560 /*
561  * Remove signals in mask from the pending set and queue.
562  * Returns 1 if any signals were found.
563  *
564  * All callers must be holding the siglock.
565  */
566 static int rm_from_queue(unsigned long mask, struct sigpending *s)
567 {
568         struct sigqueue *q, *n;
569
570         if (!sigtestsetmask(&s->signal, mask))
571                 return 0;
572
573         sigdelsetmask(&s->signal, mask);
574         list_for_each_entry_safe(q, n, &s->list, list) {
575                 if (q->info.si_signo < SIGRTMIN &&
576                     (mask & sigmask(q->info.si_signo))) {
577                         list_del_init(&q->list);
578                         __sigqueue_free(q);
579                 }
580         }
581         return 1;
582 }
583
584 /*
585  * Bad permissions for sending the signal
586  * - the caller must hold at least the RCU read lock
587  */
588 static int check_kill_permission(int sig, struct siginfo *info,
589                                  struct task_struct *t)
590 {
591         const struct cred *cred = current_cred(), *tcred;
592         struct pid *sid;
593         int error;
594
595         if (!valid_signal(sig))
596                 return -EINVAL;
597
598         if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
599                 return 0;
600
601         error = audit_signal_info(sig, t); /* Let audit system see the signal */
602         if (error)
603                 return error;
604
605         tcred = __task_cred(t);
606         if ((cred->euid ^ tcred->suid) &&
607             (cred->euid ^ tcred->uid) &&
608             (cred->uid  ^ tcred->suid) &&
609             (cred->uid  ^ tcred->uid) &&
610             !capable(CAP_KILL)) {
611                 switch (sig) {
612                 case SIGCONT:
613                         sid = task_session(t);
614                         /*
615                          * We don't return the error if sid == NULL. The
616                          * task was unhashed, the caller must notice this.
617                          */
618                         if (!sid || sid == task_session(current))
619                                 break;
620                 default:
621                         return -EPERM;
622                 }
623         }
624
625         return security_task_kill(t, info, sig, 0);
626 }
627
628 /*
629  * Handle magic process-wide effects of stop/continue signals. Unlike
630  * the signal actions, these happen immediately at signal-generation
631  * time regardless of blocking, ignoring, or handling.  This does the
632  * actual continuing for SIGCONT, but not the actual stopping for stop
633  * signals. The process stop is done as a signal action for SIG_DFL.
634  *
635  * Returns true if the signal should be actually delivered, otherwise
636  * it should be dropped.
637  */
638 static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
639 {
640         struct signal_struct *signal = p->signal;
641         struct task_struct *t;
642
643         if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) {
644                 /*
645                  * The process is in the middle of dying, nothing to do.
646                  */
647         } else if (sig_kernel_stop(sig)) {
648                 /*
649                  * This is a stop signal.  Remove SIGCONT from all queues.
650                  */
651                 rm_from_queue(sigmask(SIGCONT), &signal->shared_pending);
652                 t = p;
653                 do {
654                         rm_from_queue(sigmask(SIGCONT), &t->pending);
655                 } while_each_thread(p, t);
656         } else if (sig == SIGCONT) {
657                 unsigned int why;
658                 /*
659                  * Remove all stop signals from all queues,
660                  * and wake all threads.
661                  */
662                 rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
663                 t = p;
664                 do {
665                         unsigned int state;
666                         rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
667                         /*
668                          * If there is a handler for SIGCONT, we must make
669                          * sure that no thread returns to user mode before
670                          * we post the signal, in case it was the only
671                          * thread eligible to run the signal handler--then
672                          * it must not do anything between resuming and
673                          * running the handler.  With the TIF_SIGPENDING
674                          * flag set, the thread will pause and acquire the
675                          * siglock that we hold now and until we've queued
676                          * the pending signal.
677                          *
678                          * Wake up the stopped thread _after_ setting
679                          * TIF_SIGPENDING
680                          */
681                         state = __TASK_STOPPED;
682                         if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
683                                 set_tsk_thread_flag(t, TIF_SIGPENDING);
684                                 state |= TASK_INTERRUPTIBLE;
685                         }
686                         wake_up_state(t, state);
687                 } while_each_thread(p, t);
688
689                 /*
690                  * Notify the parent with CLD_CONTINUED if we were stopped.
691                  *
692                  * If we were in the middle of a group stop, we pretend it
693                  * was already finished, and then continued. Since SIGCHLD
694                  * doesn't queue we report only CLD_STOPPED, as if the next
695                  * CLD_CONTINUED was dropped.
696                  */
697                 why = 0;
698                 if (signal->flags & SIGNAL_STOP_STOPPED)
699                         why |= SIGNAL_CLD_CONTINUED;
700                 else if (signal->group_stop_count)
701                         why |= SIGNAL_CLD_STOPPED;
702
703                 if (why) {
704                         /*
705                          * The first thread which returns from finish_stop()
706                          * will take ->siglock, notice SIGNAL_CLD_MASK, and
707                          * notify its parent. See get_signal_to_deliver().
708                          */
709                         signal->flags = why | SIGNAL_STOP_CONTINUED;
710                         signal->group_stop_count = 0;
711                         signal->group_exit_code = 0;
712                 } else {
713                         /*
714                          * We are not stopped, but there could be a stop
715                          * signal in the middle of being processed after
716                          * being removed from the queue.  Clear that too.
717                          */
718                         signal->flags &= ~SIGNAL_STOP_DEQUEUED;
719                 }
720         }
721
722         return !sig_ignored(p, sig, from_ancestor_ns);
723 }
724
725 /*
726  * Test if P wants to take SIG.  After we've checked all threads with this,
727  * it's equivalent to finding no threads not blocking SIG.  Any threads not
728  * blocking SIG were ruled out because they are not running and already
729  * have pending signals.  Such threads will dequeue from the shared queue
730  * as soon as they're available, so putting the signal on the shared queue
731  * will be equivalent to sending it to one such thread.
732  */
733 static inline int wants_signal(int sig, struct task_struct *p)
734 {
735         if (sigismember(&p->blocked, sig))
736                 return 0;
737         if (p->flags & PF_EXITING)
738                 return 0;
739         if (sig == SIGKILL)
740                 return 1;
741         if (task_is_stopped_or_traced(p))
742                 return 0;
743         return task_curr(p) || !signal_pending(p);
744 }
745
746 static void complete_signal(int sig, struct task_struct *p, int group)
747 {
748         struct signal_struct *signal = p->signal;
749         struct task_struct *t;
750
751         /*
752          * Now find a thread we can wake up to take the signal off the queue.
753          *
754          * If the main thread wants the signal, it gets first crack.
755          * Probably the least surprising to the average bear.
756          */
757         if (wants_signal(sig, p))
758                 t = p;
759         else if (!group || thread_group_empty(p))
760                 /*
761                  * There is just one thread and it does not need to be woken.
762                  * It will dequeue unblocked signals before it runs again.
763                  */
764                 return;
765         else {
766                 /*
767                  * Otherwise try to find a suitable thread.
768                  */
769                 t = signal->curr_target;
770                 while (!wants_signal(sig, t)) {
771                         t = next_thread(t);
772                         if (t == signal->curr_target)
773                                 /*
774                                  * No thread needs to be woken.
775                                  * Any eligible threads will see
776                                  * the signal in the queue soon.
777                                  */
778                                 return;
779                 }
780                 signal->curr_target = t;
781         }
782
783         /*
784          * Found a killable thread.  If the signal will be fatal,
785          * then start taking the whole group down immediately.
786          */
787         if (sig_fatal(p, sig) &&
788             !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
789             !sigismember(&t->real_blocked, sig) &&
790             (sig == SIGKILL ||
791              !tracehook_consider_fatal_signal(t, sig))) {
792                 /*
793                  * This signal will be fatal to the whole group.
794                  */
795                 if (!sig_kernel_coredump(sig)) {
796                         /*
797                          * Start a group exit and wake everybody up.
798                          * This way we don't have other threads
799                          * running and doing things after a slower
800                          * thread has the fatal signal pending.
801                          */
802                         signal->flags = SIGNAL_GROUP_EXIT;
803                         signal->group_exit_code = sig;
804                         signal->group_stop_count = 0;
805                         t = p;
806                         do {
807                                 sigaddset(&t->pending.signal, SIGKILL);
808                                 signal_wake_up(t, 1);
809                         } while_each_thread(p, t);
810                         return;
811                 }
812         }
813
814         /*
815          * The signal is already in the shared-pending queue.
816          * Tell the chosen thread to wake up and dequeue it.
817          */
818         signal_wake_up(t, sig == SIGKILL);
819         return;
820 }
821
822 static inline int legacy_queue(struct sigpending *signals, int sig)
823 {
824         return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
825 }
826
827 static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
828                         int group, int from_ancestor_ns)
829 {
830         struct sigpending *pending;
831         struct sigqueue *q;
832
833         trace_sched_signal_send(sig, t);
834
835         assert_spin_locked(&t->sighand->siglock);
836
837         if (!prepare_signal(sig, t, from_ancestor_ns))
838                 return 0;
839
840         pending = group ? &t->signal->shared_pending : &t->pending;
841         /*
842          * Short-circuit ignored signals and support queuing
843          * exactly one non-rt signal, so that we can get more
844          * detailed information about the cause of the signal.
845          */
846         if (legacy_queue(pending, sig))
847                 return 0;
848         /*
849          * fast-pathed signals for kernel-internal things like SIGSTOP
850          * or SIGKILL.
851          */
852         if (info == SEND_SIG_FORCED)
853                 goto out_set;
854
855         /* Real-time signals must be queued if sent by sigqueue, or
856            some other real-time mechanism.  It is implementation
857            defined whether kill() does so.  We attempt to do so, on
858            the principle of least surprise, but since kill is not
859            allowed to fail with EAGAIN when low on memory we just
860            make sure at least one signal gets delivered and don't
861            pass on the info struct.  */
862
863         q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
864                                              (is_si_special(info) ||
865                                               info->si_code >= 0)));
866         if (q) {
867                 list_add_tail(&q->list, &pending->list);
868                 switch ((unsigned long) info) {
869                 case (unsigned long) SEND_SIG_NOINFO:
870                         q->info.si_signo = sig;
871                         q->info.si_errno = 0;
872                         q->info.si_code = SI_USER;
873                         q->info.si_pid = task_tgid_nr_ns(current,
874                                                         task_active_pid_ns(t));
875                         q->info.si_uid = current_uid();
876                         break;
877                 case (unsigned long) SEND_SIG_PRIV:
878                         q->info.si_signo = sig;
879                         q->info.si_errno = 0;
880                         q->info.si_code = SI_KERNEL;
881                         q->info.si_pid = 0;
882                         q->info.si_uid = 0;
883                         break;
884                 default:
885                         copy_siginfo(&q->info, info);
886                         break;
887                 }
888         } else if (!is_si_special(info)) {
889                 if (sig >= SIGRTMIN && info->si_code != SI_USER)
890                 /*
891                  * Queue overflow, abort.  We may abort if the signal was rt
892                  * and sent by user using something other than kill().
893                  */
894                         return -EAGAIN;
895         }
896
897 out_set:
898         signalfd_notify(t, sig);
899         sigaddset(&pending->signal, sig);
900         complete_signal(sig, t, group);
901         return 0;
902 }
903
904 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
905                         int group)
906 {
907         int from_ancestor_ns = 0;
908
909 #ifdef CONFIG_PID_NS
910         if (!is_si_special(info) && SI_FROMUSER(info) &&
911                         task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0)
912                 from_ancestor_ns = 1;
913 #endif
914
915         return __send_signal(sig, info, t, group, from_ancestor_ns);
916 }
917
918 int print_fatal_signals;
919
920 static void print_fatal_signal(struct pt_regs *regs, int signr)
921 {
922         printk("%s/%d: potentially unexpected fatal signal %d.\n",
923                 current->comm, task_pid_nr(current), signr);
924
925 #if defined(__i386__) && !defined(__arch_um__)
926         printk("code at %08lx: ", regs->ip);
927         {
928                 int i;
929                 for (i = 0; i < 16; i++) {
930                         unsigned char insn;
931
932                         __get_user(insn, (unsigned char *)(regs->ip + i));
933                         printk("%02x ", insn);
934                 }
935         }
936 #endif
937         printk("\n");
938         preempt_disable();
939         show_regs(regs);
940         preempt_enable();
941 }
942
943 static int __init setup_print_fatal_signals(char *str)
944 {
945         get_option (&str, &print_fatal_signals);
946
947         return 1;
948 }
949
950 __setup("print-fatal-signals=", setup_print_fatal_signals);
951
952 int
953 __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
954 {
955         return send_signal(sig, info, p, 1);
956 }
957
958 static int
959 specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
960 {
961         return send_signal(sig, info, t, 0);
962 }
963
964 /*
965  * Force a signal that the process can't ignore: if necessary
966  * we unblock the signal and change any SIG_IGN to SIG_DFL.
967  *
968  * Note: If we unblock the signal, we always reset it to SIG_DFL,
969  * since we do not want to have a signal handler that was blocked
970  * be invoked when user space had explicitly blocked it.
971  *
972  * We don't want to have recursive SIGSEGV's etc, for example,
973  * that is why we also clear SIGNAL_UNKILLABLE.
974  */
975 int
976 force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
977 {
978         unsigned long int flags;
979         int ret, blocked, ignored;
980         struct k_sigaction *action;
981
982         spin_lock_irqsave(&t->sighand->siglock, flags);
983         action = &t->sighand->action[sig-1];
984         ignored = action->sa.sa_handler == SIG_IGN;
985         blocked = sigismember(&t->blocked, sig);
986         if (blocked || ignored) {
987                 action->sa.sa_handler = SIG_DFL;
988                 if (blocked) {
989                         sigdelset(&t->blocked, sig);
990                         recalc_sigpending_and_wake(t);
991                 }
992         }
993         if (action->sa.sa_handler == SIG_DFL)
994                 t->signal->flags &= ~SIGNAL_UNKILLABLE;
995         ret = specific_send_sig_info(sig, info, t);
996         spin_unlock_irqrestore(&t->sighand->siglock, flags);
997
998         return ret;
999 }
1000
1001 void
1002 force_sig_specific(int sig, struct task_struct *t)
1003 {
1004         force_sig_info(sig, SEND_SIG_FORCED, t);
1005 }
1006
1007 /*
1008  * Nuke all other threads in the group.
1009  */
1010 void zap_other_threads(struct task_struct *p)
1011 {
1012         struct task_struct *t;
1013
1014         p->signal->group_stop_count = 0;
1015
1016         for (t = next_thread(p); t != p; t = next_thread(t)) {
1017                 /*
1018                  * Don't bother with already dead threads
1019                  */
1020                 if (t->exit_state)
1021                         continue;
1022
1023                 /* SIGKILL will be handled before any pending SIGSTOP */
1024                 sigaddset(&t->pending.signal, SIGKILL);
1025                 signal_wake_up(t, 1);
1026         }
1027 }
1028
1029 int __fatal_signal_pending(struct task_struct *tsk)
1030 {
1031         return sigismember(&tsk->pending.signal, SIGKILL);
1032 }
1033 EXPORT_SYMBOL(__fatal_signal_pending);
1034
1035 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
1036 {
1037         struct sighand_struct *sighand;
1038
1039         rcu_read_lock();
1040         for (;;) {
1041                 sighand = rcu_dereference(tsk->sighand);
1042                 if (unlikely(sighand == NULL))
1043                         break;
1044
1045                 spin_lock_irqsave(&sighand->siglock, *flags);
1046                 if (likely(sighand == tsk->sighand))
1047                         break;
1048                 spin_unlock_irqrestore(&sighand->siglock, *flags);
1049         }
1050         rcu_read_unlock();
1051
1052         return sighand;
1053 }
1054
1055 /*
1056  * send signal info to all the members of a group
1057  * - the caller must hold the RCU read lock at least
1058  */
1059 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1060 {
1061         unsigned long flags;
1062         int ret;
1063
1064         ret = check_kill_permission(sig, info, p);
1065
1066         if (!ret && sig) {
1067                 ret = -ESRCH;
1068                 if (lock_task_sighand(p, &flags)) {
1069                         ret = __group_send_sig_info(sig, info, p);
1070                         unlock_task_sighand(p, &flags);
1071                 }
1072         }
1073
1074         return ret;
1075 }
1076
1077 /*
1078  * __kill_pgrp_info() sends a signal to a process group: this is what the tty
1079  * control characters do (^C, ^Z etc)
1080  * - the caller must hold at least a readlock on tasklist_lock
1081  */
1082 int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
1083 {
1084         struct task_struct *p = NULL;
1085         int retval, success;
1086
1087         success = 0;
1088         retval = -ESRCH;
1089         do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
1090                 int err = group_send_sig_info(sig, info, p);
1091                 success |= !err;
1092                 retval = err;
1093         } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
1094         return success ? 0 : retval;
1095 }
1096
1097 int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
1098 {
1099         int error = -ESRCH;
1100         struct task_struct *p;
1101
1102         rcu_read_lock();
1103 retry:
1104         p = pid_task(pid, PIDTYPE_PID);
1105         if (p) {
1106                 error = group_send_sig_info(sig, info, p);
1107                 if (unlikely(error == -ESRCH))
1108                         /*
1109                          * The task was unhashed in between, try again.
1110                          * If it is dead, pid_task() will return NULL,
1111                          * if we race with de_thread() it will find the
1112                          * new leader.
1113                          */
1114                         goto retry;
1115         }
1116         rcu_read_unlock();
1117
1118         return error;
1119 }
1120
1121 int
1122 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1123 {
1124         int error;
1125         rcu_read_lock();
1126         error = kill_pid_info(sig, info, find_vpid(pid));
1127         rcu_read_unlock();
1128         return error;
1129 }
1130
1131 /* like kill_pid_info(), but doesn't use uid/euid of "current" */
1132 int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1133                       uid_t uid, uid_t euid, u32 secid)
1134 {
1135         int ret = -EINVAL;
1136         struct task_struct *p;
1137         const struct cred *pcred;
1138
1139         if (!valid_signal(sig))
1140                 return ret;
1141
1142         read_lock(&tasklist_lock);
1143         p = pid_task(pid, PIDTYPE_PID);
1144         if (!p) {
1145                 ret = -ESRCH;
1146                 goto out_unlock;
1147         }
1148         pcred = __task_cred(p);
1149         if ((info == SEND_SIG_NOINFO ||
1150              (!is_si_special(info) && SI_FROMUSER(info))) &&
1151             euid != pcred->suid && euid != pcred->uid &&
1152             uid  != pcred->suid && uid  != pcred->uid) {
1153                 ret = -EPERM;
1154                 goto out_unlock;
1155         }
1156         ret = security_task_kill(p, info, sig, secid);
1157         if (ret)
1158                 goto out_unlock;
1159         if (sig && p->sighand) {
1160                 unsigned long flags;
1161                 spin_lock_irqsave(&p->sighand->siglock, flags);
1162                 ret = __send_signal(sig, info, p, 1, 0);
1163                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1164         }
1165 out_unlock:
1166         read_unlock(&tasklist_lock);
1167         return ret;
1168 }
1169 EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
1170
1171 /*
1172  * kill_something_info() interprets pid in interesting ways just like kill(2).
1173  *
1174  * POSIX specifies that kill(-1,sig) is unspecified, but what we have
1175  * is probably wrong.  Should make it like BSD or SYSV.
1176  */
1177
1178 static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1179 {
1180         int ret;
1181
1182         if (pid > 0) {
1183                 rcu_read_lock();
1184                 ret = kill_pid_info(sig, info, find_vpid(pid));
1185                 rcu_read_unlock();
1186                 return ret;
1187         }
1188
1189         read_lock(&tasklist_lock);
1190         if (pid != -1) {
1191                 ret = __kill_pgrp_info(sig, info,
1192                                 pid ? find_vpid(-pid) : task_pgrp(current));
1193         } else {
1194                 int retval = 0, count = 0;
1195                 struct task_struct * p;
1196
1197                 for_each_process(p) {
1198                         if (task_pid_vnr(p) > 1 &&
1199                                         !same_thread_group(p, current)) {
1200                                 int err = group_send_sig_info(sig, info, p);
1201                                 ++count;
1202                                 if (err != -EPERM)
1203                                         retval = err;
1204                         }
1205                 }
1206                 ret = count ? retval : -ESRCH;
1207         }
1208         read_unlock(&tasklist_lock);
1209
1210         return ret;
1211 }
1212
1213 /*
1214  * These are for backward compatibility with the rest of the kernel source.
1215  */
1216
1217 /*
1218  * The caller must ensure the task can't exit.
1219  */
1220 int
1221 send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1222 {
1223         int ret;
1224         unsigned long flags;
1225
1226         /*
1227          * Make sure legacy kernel users don't send in bad values
1228          * (normal paths check this in check_kill_permission).
1229          */
1230         if (!valid_signal(sig))
1231                 return -EINVAL;
1232
1233         spin_lock_irqsave(&p->sighand->siglock, flags);
1234         ret = specific_send_sig_info(sig, info, p);
1235         spin_unlock_irqrestore(&p->sighand->siglock, flags);
1236         return ret;
1237 }
1238
1239 #define __si_special(priv) \
1240         ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)
1241
1242 int
1243 send_sig(int sig, struct task_struct *p, int priv)
1244 {
1245         return send_sig_info(sig, __si_special(priv), p);
1246 }
1247
1248 void
1249 force_sig(int sig, struct task_struct *p)
1250 {
1251         force_sig_info(sig, SEND_SIG_PRIV, p);
1252 }
1253
1254 /*
1255  * When things go south during signal handling, we
1256  * will force a SIGSEGV. And if the signal that caused
1257  * the problem was already a SIGSEGV, we'll want to
1258  * make sure we don't even try to deliver the signal..
1259  */
1260 int
1261 force_sigsegv(int sig, struct task_struct *p)
1262 {
1263         if (sig == SIGSEGV) {
1264                 unsigned long flags;
1265                 spin_lock_irqsave(&p->sighand->siglock, flags);
1266                 p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
1267                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1268         }
1269         force_sig(SIGSEGV, p);
1270         return 0;
1271 }
1272
1273 int kill_pgrp(struct pid *pid, int sig, int priv)
1274 {
1275         int ret;
1276
1277         read_lock(&tasklist_lock);
1278         ret = __kill_pgrp_info(sig, __si_special(priv), pid);
1279         read_unlock(&tasklist_lock);
1280
1281         return ret;
1282 }
1283 EXPORT_SYMBOL(kill_pgrp);
1284
1285 int kill_pid(struct pid *pid, int sig, int priv)
1286 {
1287         return kill_pid_info(sig, __si_special(priv), pid);
1288 }
1289 EXPORT_SYMBOL(kill_pid);
1290
1291 /*
1292  * These functions support sending signals using preallocated sigqueue
1293  * structures.  This is needed "because realtime applications cannot
1294  * afford to lose notifications of asynchronous events, like timer
1295  * expirations or I/O completions".  In the case of Posix Timers 
1296  * we allocate the sigqueue structure from the timer_create.  If this
1297  * allocation fails we are able to report the failure to the application
1298  * with an EAGAIN error.
1299  */
1300  
1301 struct sigqueue *sigqueue_alloc(void)
1302 {
1303         struct sigqueue *q;
1304
1305         if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
1306                 q->flags |= SIGQUEUE_PREALLOC;
1307         return(q);
1308 }
1309
1310 void sigqueue_free(struct sigqueue *q)
1311 {
1312         unsigned long flags;
1313         spinlock_t *lock = &current->sighand->siglock;
1314
1315         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1316         /*
1317          * We must hold ->siglock while testing q->list
1318          * to serialize with collect_signal() or with
1319          * __exit_signal()->flush_sigqueue().
1320          */
1321         spin_lock_irqsave(lock, flags);
1322         q->flags &= ~SIGQUEUE_PREALLOC;
1323         /*
1324          * If it is queued it will be freed when dequeued,
1325          * like the "regular" sigqueue.
1326          */
1327         if (!list_empty(&q->list))
1328                 q = NULL;
1329         spin_unlock_irqrestore(lock, flags);
1330
1331         if (q)
1332                 __sigqueue_free(q);
1333 }
1334
1335 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1336 {
1337         int sig = q->info.si_signo;
1338         struct sigpending *pending;
1339         unsigned long flags;
1340         int ret;
1341
1342         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1343
1344         ret = -1;
1345         if (!likely(lock_task_sighand(t, &flags)))
1346                 goto ret;
1347
1348         ret = 1; /* the signal is ignored */
1349         if (!prepare_signal(sig, t, 0))
1350                 goto out;
1351
1352         ret = 0;
1353         if (unlikely(!list_empty(&q->list))) {
1354                 /*
1355                  * If an SI_TIMER entry is already queue just increment
1356                  * the overrun count.
1357                  */
1358                 BUG_ON(q->info.si_code != SI_TIMER);
1359                 q->info.si_overrun++;
1360                 goto out;
1361         }
1362         q->info.si_overrun = 0;
1363
1364         signalfd_notify(t, sig);
1365         pending = group ? &t->signal->shared_pending : &t->pending;
1366         list_add_tail(&q->list, &pending->list);
1367         sigaddset(&pending->signal, sig);
1368         complete_signal(sig, t, group);
1369 out:
1370         unlock_task_sighand(t, &flags);
1371 ret:
1372         return ret;
1373 }
1374
1375 /*
1376  * Wake up any threads in the parent blocked in wait* syscalls.
1377  */
1378 static inline void __wake_up_parent(struct task_struct *p,
1379                                     struct task_struct *parent)
1380 {
1381         wake_up_interruptible_sync(&parent->signal->wait_chldexit);
1382 }
1383
1384 /*
1385  * Let a parent know about the death of a child.
1386  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
1387  *
1388  * Returns -1 if our parent ignored us and so we've switched to
1389  * self-reaping, or else @sig.
1390  */
1391 int do_notify_parent(struct task_struct *tsk, int sig)
1392 {
1393         struct siginfo info;
1394         unsigned long flags;
1395         struct sighand_struct *psig;
1396         int ret = sig;
1397
1398         BUG_ON(sig == -1);
1399
1400         /* do_notify_parent_cldstop should have been called instead.  */
1401         BUG_ON(task_is_stopped_or_traced(tsk));
1402
1403         BUG_ON(!tsk->ptrace &&
1404                (tsk->group_leader != tsk || !thread_group_empty(tsk)));
1405
1406         info.si_signo = sig;
1407         info.si_errno = 0;
1408         /*
1409          * we are under tasklist_lock here so our parent is tied to
1410          * us and cannot exit and release its namespace.
1411          *
1412          * the only it can is to switch its nsproxy with sys_unshare,
1413          * bu uncharing pid namespaces is not allowed, so we'll always
1414          * see relevant namespace
1415          *
1416          * write_lock() currently calls preempt_disable() which is the
1417          * same as rcu_read_lock(), but according to Oleg, this is not
1418          * correct to rely on this
1419          */
1420         rcu_read_lock();
1421         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1422         info.si_uid = __task_cred(tsk)->uid;
1423         rcu_read_unlock();
1424
1425         info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1426                                 tsk->signal->utime));
1427         info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1428                                 tsk->signal->stime));
1429
1430         info.si_status = tsk->exit_code & 0x7f;
1431         if (tsk->exit_code & 0x80)
1432                 info.si_code = CLD_DUMPED;
1433         else if (tsk->exit_code & 0x7f)
1434                 info.si_code = CLD_KILLED;
1435         else {
1436                 info.si_code = CLD_EXITED;
1437                 info.si_status = tsk->exit_code >> 8;
1438         }
1439
1440         psig = tsk->parent->sighand;
1441         spin_lock_irqsave(&psig->siglock, flags);
1442         if (!tsk->ptrace && sig == SIGCHLD &&
1443             (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
1444              (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
1445                 /*
1446                  * We are exiting and our parent doesn't care.  POSIX.1
1447                  * defines special semantics for setting SIGCHLD to SIG_IGN
1448                  * or setting the SA_NOCLDWAIT flag: we should be reaped
1449                  * automatically and not left for our parent's wait4 call.
1450                  * Rather than having the parent do it as a magic kind of
1451                  * signal handler, we just set this to tell do_exit that we
1452                  * can be cleaned up without becoming a zombie.  Note that
1453                  * we still call __wake_up_parent in this case, because a
1454                  * blocked sys_wait4 might now return -ECHILD.
1455                  *
1456                  * Whether we send SIGCHLD or not for SA_NOCLDWAIT
1457                  * is implementation-defined: we do (if you don't want
1458                  * it, just use SIG_IGN instead).
1459                  */
1460                 ret = tsk->exit_signal = -1;
1461                 if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
1462                         sig = -1;
1463         }
1464         if (valid_signal(sig) && sig > 0)
1465                 __group_send_sig_info(sig, &info, tsk->parent);
1466         __wake_up_parent(tsk, tsk->parent);
1467         spin_unlock_irqrestore(&psig->siglock, flags);
1468
1469         return ret;
1470 }
1471
1472 static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1473 {
1474         struct siginfo info;
1475         unsigned long flags;
1476         struct task_struct *parent;
1477         struct sighand_struct *sighand;
1478
1479         if (tsk->ptrace & PT_PTRACED)
1480                 parent = tsk->parent;
1481         else {
1482                 tsk = tsk->group_leader;
1483                 parent = tsk->real_parent;
1484         }
1485
1486         info.si_signo = SIGCHLD;
1487         info.si_errno = 0;
1488         /*
1489          * see comment in do_notify_parent() abot the following 3 lines
1490          */
1491         rcu_read_lock();
1492         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1493         info.si_uid = __task_cred(tsk)->uid;
1494         rcu_read_unlock();
1495
1496         info.si_utime = cputime_to_clock_t(tsk->utime);
1497         info.si_stime = cputime_to_clock_t(tsk->stime);
1498
1499         info.si_code = why;
1500         switch (why) {
1501         case CLD_CONTINUED:
1502                 info.si_status = SIGCONT;
1503                 break;
1504         case CLD_STOPPED:
1505                 info.si_status = tsk->signal->group_exit_code & 0x7f;
1506                 break;
1507         case CLD_TRAPPED:
1508                 info.si_status = tsk->exit_code & 0x7f;
1509                 break;
1510         default:
1511                 BUG();
1512         }
1513
1514         sighand = parent->sighand;
1515         spin_lock_irqsave(&sighand->siglock, flags);
1516         if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
1517             !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
1518                 __group_send_sig_info(SIGCHLD, &info, parent);
1519         /*
1520          * Even if SIGCHLD is not generated, we must wake up wait4 calls.
1521          */
1522         __wake_up_parent(tsk, parent);
1523         spin_unlock_irqrestore(&sighand->siglock, flags);
1524 }
1525
1526 static inline int may_ptrace_stop(void)
1527 {
1528         if (!likely(current->ptrace & PT_PTRACED))
1529                 return 0;
1530         /*
1531          * Are we in the middle of do_coredump?
1532          * If so and our tracer is also part of the coredump stopping
1533          * is a deadlock situation, and pointless because our tracer
1534          * is dead so don't allow us to stop.
1535          * If SIGKILL was already sent before the caller unlocked
1536          * ->siglock we must see ->core_state != NULL. Otherwise it
1537          * is safe to enter schedule().
1538          */
1539         if (unlikely(current->mm->core_state) &&
1540             unlikely(current->mm == current->parent->mm))
1541                 return 0;
1542
1543         return 1;
1544 }
1545
1546 /*
1547  * Return nonzero if there is a SIGKILL that should be waking us up.
1548  * Called with the siglock held.
1549  */
1550 static int sigkill_pending(struct task_struct *tsk)
1551 {
1552         return  sigismember(&tsk->pending.signal, SIGKILL) ||
1553                 sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
1554 }
1555
1556 /*
1557  * This must be called with current->sighand->siglock held.
1558  *
1559  * This should be the path for all ptrace stops.
1560  * We always set current->last_siginfo while stopped here.
1561  * That makes it a way to test a stopped process for
1562  * being ptrace-stopped vs being job-control-stopped.
1563  *
1564  * If we actually decide not to stop at all because the tracer
1565  * is gone, we keep current->exit_code unless clear_code.
1566  */
1567 static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1568 {
1569         if (arch_ptrace_stop_needed(exit_code, info)) {
1570                 /*
1571                  * The arch code has something special to do before a
1572                  * ptrace stop.  This is allowed to block, e.g. for faults
1573                  * on user stack pages.  We can't keep the siglock while
1574                  * calling arch_ptrace_stop, so we must release it now.
1575                  * To preserve proper semantics, we must do this before
1576                  * any signal bookkeeping like checking group_stop_count.
1577                  * Meanwhile, a SIGKILL could come in before we retake the
1578                  * siglock.  That must prevent us from sleeping in TASK_TRACED.
1579                  * So after regaining the lock, we must check for SIGKILL.
1580                  */
1581                 spin_unlock_irq(&current->sighand->siglock);
1582                 arch_ptrace_stop(exit_code, info);
1583                 spin_lock_irq(&current->sighand->siglock);
1584                 if (sigkill_pending(current))
1585                         return;
1586         }
1587
1588         /*
1589          * If there is a group stop in progress,
1590          * we must participate in the bookkeeping.
1591          */
1592         if (current->signal->group_stop_count > 0)
1593                 --current->signal->group_stop_count;
1594
1595         current->last_siginfo = info;
1596         current->exit_code = exit_code;
1597
1598         /* Let the debugger run.  */
1599         __set_current_state(TASK_TRACED);
1600         spin_unlock_irq(&current->sighand->siglock);
1601         read_lock(&tasklist_lock);
1602         if (may_ptrace_stop()) {
1603                 do_notify_parent_cldstop(current, CLD_TRAPPED);
1604                 /*
1605                  * Don't want to allow preemption here, because
1606                  * sys_ptrace() needs this task to be inactive.
1607                  *
1608                  * XXX: implement read_unlock_no_resched().
1609                  */
1610                 preempt_disable();
1611                 read_unlock(&tasklist_lock);
1612                 preempt_enable_no_resched();
1613                 schedule();
1614         } else {
1615                 /*
1616                  * By the time we got the lock, our tracer went away.
1617                  * Don't drop the lock yet, another tracer may come.
1618                  */
1619                 __set_current_state(TASK_RUNNING);
1620                 if (clear_code)
1621                         current->exit_code = 0;
1622                 read_unlock(&tasklist_lock);
1623         }
1624
1625         /*
1626          * While in TASK_TRACED, we were considered "frozen enough".
1627          * Now that we woke up, it's crucial if we're supposed to be
1628          * frozen that we freeze now before running anything substantial.
1629          */
1630         try_to_freeze();
1631
1632         /*
1633          * We are back.  Now reacquire the siglock before touching
1634          * last_siginfo, so that we are sure to have synchronized with
1635          * any signal-sending on another CPU that wants to examine it.
1636          */
1637         spin_lock_irq(&current->sighand->siglock);
1638         current->last_siginfo = NULL;
1639
1640         /*
1641          * Queued signals ignored us while we were stopped for tracing.
1642          * So check for any that we should take before resuming user mode.
1643          * This sets TIF_SIGPENDING, but never clears it.
1644          */
1645         recalc_sigpending_tsk(current);
1646 }
1647
1648 void ptrace_notify(int exit_code)
1649 {
1650         siginfo_t info;
1651
1652         BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
1653
1654         memset(&info, 0, sizeof info);
1655         info.si_signo = SIGTRAP;
1656         info.si_code = exit_code;
1657         info.si_pid = task_pid_vnr(current);
1658         info.si_uid = current_uid();
1659
1660         /* Let the debugger run.  */
1661         spin_lock_irq(&current->sighand->siglock);
1662         ptrace_stop(exit_code, 1, &info);
1663         spin_unlock_irq(&current->sighand->siglock);
1664 }
1665
1666 static void
1667 finish_stop(int stop_count)
1668 {
1669         /*
1670          * If there are no other threads in the group, or if there is
1671          * a group stop in progress and we are the last to stop,
1672          * report to the parent.  When ptraced, every thread reports itself.
1673          */
1674         if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
1675                 read_lock(&tasklist_lock);
1676                 do_notify_parent_cldstop(current, CLD_STOPPED);
1677                 read_unlock(&tasklist_lock);
1678         }
1679
1680         do {
1681                 schedule();
1682         } while (try_to_freeze());
1683         /*
1684          * Now we don't run again until continued.
1685          */
1686         current->exit_code = 0;
1687 }
1688
1689 /*
1690  * This performs the stopping for SIGSTOP and other stop signals.
1691  * We have to stop all threads in the thread group.
1692  * Returns nonzero if we've actually stopped and released the siglock.
1693  * Returns zero if we didn't stop and still hold the siglock.
1694  */
1695 static int do_signal_stop(int signr)
1696 {
1697         struct signal_struct *sig = current->signal;
1698         int stop_count;
1699
1700         if (sig->group_stop_count > 0) {
1701                 /*
1702                  * There is a group stop in progress.  We don't need to
1703                  * start another one.
1704                  */
1705                 stop_count = --sig->group_stop_count;
1706         } else {
1707                 struct task_struct *t;
1708
1709                 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
1710                     unlikely(signal_group_exit(sig)))
1711                         return 0;
1712                 /*
1713                  * There is no group stop already in progress.
1714                  * We must initiate one now.
1715                  */
1716                 sig->group_exit_code = signr;
1717
1718                 stop_count = 0;
1719                 for (t = next_thread(current); t != current; t = next_thread(t))
1720                         /*
1721                          * Setting state to TASK_STOPPED for a group
1722                          * stop is always done with the siglock held,
1723                          * so this check has no races.
1724                          */
1725                         if (!(t->flags & PF_EXITING) &&
1726                             !task_is_stopped_or_traced(t)) {
1727                                 stop_count++;
1728                                 signal_wake_up(t, 0);
1729                         }
1730                 sig->group_stop_count = stop_count;
1731         }
1732
1733         if (stop_count == 0)
1734                 sig->flags = SIGNAL_STOP_STOPPED;
1735         current->exit_code = sig->group_exit_code;
1736         __set_current_state(TASK_STOPPED);
1737
1738         spin_unlock_irq(&current->sighand->siglock);
1739         finish_stop(stop_count);
1740         return 1;
1741 }
1742
1743 static int ptrace_signal(int signr, siginfo_t *info,
1744                          struct pt_regs *regs, void *cookie)
1745 {
1746         if (!(current->ptrace & PT_PTRACED))
1747                 return signr;
1748
1749         ptrace_signal_deliver(regs, cookie);
1750
1751         /* Let the debugger run.  */
1752         ptrace_stop(signr, 0, info);
1753
1754         /* We're back.  Did the debugger cancel the sig?  */
1755         signr = current->exit_code;
1756         if (signr == 0)
1757                 return signr;
1758
1759         current->exit_code = 0;
1760
1761         /* Update the siginfo structure if the signal has
1762            changed.  If the debugger wanted something
1763            specific in the siginfo structure then it should
1764            have updated *info via PTRACE_SETSIGINFO.  */
1765         if (signr != info->si_signo) {
1766                 info->si_signo = signr;
1767                 info->si_errno = 0;
1768                 info->si_code = SI_USER;
1769                 info->si_pid = task_pid_vnr(current->parent);
1770                 info->si_uid = task_uid(current->parent);
1771         }
1772
1773         /* If the (new) signal is now blocked, requeue it.  */
1774         if (sigismember(&current->blocked, signr)) {
1775                 specific_send_sig_info(signr, info, current);
1776                 signr = 0;
1777         }
1778
1779         return signr;
1780 }
1781
1782 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
1783                           struct pt_regs *regs, void *cookie)
1784 {
1785         struct sighand_struct *sighand = current->sighand;
1786         struct signal_struct *signal = current->signal;
1787         int signr;
1788
1789 relock:
1790         /*
1791          * We'll jump back here after any time we were stopped in TASK_STOPPED.
1792          * While in TASK_STOPPED, we were considered "frozen enough".
1793          * Now that we woke up, it's crucial if we're supposed to be
1794          * frozen that we freeze now before running anything substantial.
1795          */
1796         try_to_freeze();
1797
1798         spin_lock_irq(&sighand->siglock);
1799         /*
1800          * Every stopped thread goes here after wakeup. Check to see if
1801          * we should notify the parent, prepare_signal(SIGCONT) encodes
1802          * the CLD_ si_code into SIGNAL_CLD_MASK bits.
1803          */
1804         if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
1805                 int why = (signal->flags & SIGNAL_STOP_CONTINUED)
1806                                 ? CLD_CONTINUED : CLD_STOPPED;
1807                 signal->flags &= ~SIGNAL_CLD_MASK;
1808                 spin_unlock_irq(&sighand->siglock);
1809
1810                 if (unlikely(!tracehook_notify_jctl(1, why)))
1811                         goto relock;
1812
1813                 read_lock(&tasklist_lock);
1814                 do_notify_parent_cldstop(current->group_leader, why);
1815                 read_unlock(&tasklist_lock);
1816                 goto relock;
1817         }
1818
1819         for (;;) {
1820                 struct k_sigaction *ka;
1821
1822                 if (unlikely(signal->group_stop_count > 0) &&
1823                     do_signal_stop(0))
1824                         goto relock;
1825
1826                 /*
1827                  * Tracing can induce an artifical signal and choose sigaction.
1828                  * The return value in @signr determines the default action,
1829                  * but @info->si_signo is the signal number we will report.
1830                  */
1831                 signr = tracehook_get_signal(current, regs, info, return_ka);
1832                 if (unlikely(signr < 0))
1833                         goto relock;
1834                 if (unlikely(signr != 0))
1835                         ka = return_ka;
1836                 else {
1837                         signr = dequeue_signal(current, &current->blocked,
1838                                                info);
1839
1840                         if (!signr)
1841                                 break; /* will return 0 */
1842
1843                         if (signr != SIGKILL) {
1844                                 signr = ptrace_signal(signr, info,
1845                                                       regs, cookie);
1846                                 if (!signr)
1847                                         continue;
1848                         }
1849
1850                         ka = &sighand->action[signr-1];
1851                 }
1852
1853                 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
1854                         continue;
1855                 if (ka->sa.sa_handler != SIG_DFL) {
1856                         /* Run the handler.  */
1857                         *return_ka = *ka;
1858
1859                         if (ka->sa.sa_flags & SA_ONESHOT)
1860                                 ka->sa.sa_handler = SIG_DFL;
1861
1862                         break; /* will return non-zero "signr" value */
1863                 }
1864
1865                 /*
1866                  * Now we are doing the default action for this signal.
1867                  */
1868                 if (sig_kernel_ignore(signr)) /* Default is nothing. */
1869                         continue;
1870
1871                 /*
1872                  * Global init gets no signals it doesn't want.
1873                  */
1874                 if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
1875                     !signal_group_exit(signal))
1876                         continue;
1877
1878                 if (sig_kernel_stop(signr)) {
1879                         /*
1880                          * The default action is to stop all threads in
1881                          * the thread group.  The job control signals
1882                          * do nothing in an orphaned pgrp, but SIGSTOP
1883                          * always works.  Note that siglock needs to be
1884                          * dropped during the call to is_orphaned_pgrp()
1885                          * because of lock ordering with tasklist_lock.
1886                          * This allows an intervening SIGCONT to be posted.
1887                          * We need to check for that and bail out if necessary.
1888                          */
1889                         if (signr != SIGSTOP) {
1890                                 spin_unlock_irq(&sighand->siglock);
1891
1892                                 /* signals can be posted during this window */
1893
1894                                 if (is_current_pgrp_orphaned())
1895                                         goto relock;
1896
1897                                 spin_lock_irq(&sighand->siglock);
1898                         }
1899
1900                         if (likely(do_signal_stop(info->si_signo))) {
1901                                 /* It released the siglock.  */
1902                                 goto relock;
1903                         }
1904
1905                         /*
1906                          * We didn't actually stop, due to a race
1907                          * with SIGCONT or something like that.
1908                          */
1909                         continue;
1910                 }
1911
1912                 spin_unlock_irq(&sighand->siglock);
1913
1914                 /*
1915                  * Anything else is fatal, maybe with a core dump.
1916                  */
1917                 current->flags |= PF_SIGNALED;
1918
1919                 if (sig_kernel_coredump(signr)) {
1920                         if (print_fatal_signals)
1921                                 print_fatal_signal(regs, info->si_signo);
1922                         /*
1923                          * If it was able to dump core, this kills all
1924                          * other threads in the group and synchronizes with
1925                          * their demise.  If we lost the race with another
1926                          * thread getting here, it set group_exit_code
1927                          * first and our do_group_exit call below will use
1928                          * that value and ignore the one we pass it.
1929                          */
1930                         do_coredump(info->si_signo, info->si_signo, regs);
1931                 }
1932
1933                 /*
1934                  * Death signals, no core dump.
1935                  */
1936                 do_group_exit(info->si_signo);
1937                 /* NOTREACHED */
1938         }
1939         spin_unlock_irq(&sighand->siglock);
1940         return signr;
1941 }
1942
1943 void exit_signals(struct task_struct *tsk)
1944 {
1945         int group_stop = 0;
1946         struct task_struct *t;
1947
1948         if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
1949                 tsk->flags |= PF_EXITING;
1950                 return;
1951         }
1952
1953         spin_lock_irq(&tsk->sighand->siglock);
1954         /*
1955          * From now this task is not visible for group-wide signals,
1956          * see wants_signal(), do_signal_stop().
1957          */
1958         tsk->flags |= PF_EXITING;
1959         if (!signal_pending(tsk))
1960                 goto out;
1961
1962         /* It could be that __group_complete_signal() choose us to
1963          * notify about group-wide signal. Another thread should be
1964          * woken now to take the signal since we will not.
1965          */
1966         for (t = tsk; (t = next_thread(t)) != tsk; )
1967                 if (!signal_pending(t) && !(t->flags & PF_EXITING))
1968                         recalc_sigpending_and_wake(t);
1969
1970         if (unlikely(tsk->signal->group_stop_count) &&
1971                         !--tsk->signal->group_stop_count) {
1972                 tsk->signal->flags = SIGNAL_STOP_STOPPED;
1973                 group_stop = 1;
1974         }
1975 out:
1976         spin_unlock_irq(&tsk->sighand->siglock);
1977
1978         if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
1979                 read_lock(&tasklist_lock);
1980                 do_notify_parent_cldstop(tsk, CLD_STOPPED);
1981                 read_unlock(&tasklist_lock);
1982         }
1983 }
1984
1985 EXPORT_SYMBOL(recalc_sigpending);
1986 EXPORT_SYMBOL_GPL(dequeue_signal);
1987 EXPORT_SYMBOL(flush_signals);
1988 EXPORT_SYMBOL(force_sig);
1989 EXPORT_SYMBOL(send_sig);
1990 EXPORT_SYMBOL(send_sig_info);
1991 EXPORT_SYMBOL(sigprocmask);
1992 EXPORT_SYMBOL(block_all_signals);
1993 EXPORT_SYMBOL(unblock_all_signals);
1994
1995
1996 /*
1997  * System call entry points.
1998  */
1999
2000 SYSCALL_DEFINE0(restart_syscall)
2001 {
2002         struct restart_block *restart = &current_thread_info()->restart_block;
2003         return restart->fn(restart);
2004 }
2005
2006 long do_no_restart_syscall(struct restart_block *param)
2007 {
2008         return -EINTR;
2009 }
2010
2011 /*
2012  * We don't need to get the kernel lock - this is all local to this
2013  * particular thread.. (and that's good, because this is _heavily_
2014  * used by various programs)
2015  */
2016
2017 /*
2018  * This is also useful for kernel threads that want to temporarily
2019  * (or permanently) block certain signals.
2020  *
2021  * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel
2022  * interface happily blocks "unblockable" signals like SIGKILL
2023  * and friends.
2024  */
2025 int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
2026 {
2027         int error;
2028
2029         spin_lock_irq(&current->sighand->siglock);
2030         if (oldset)
2031                 *oldset = current->blocked;
2032
2033         error = 0;
2034         switch (how) {
2035         case SIG_BLOCK:
2036                 sigorsets(&current->blocked, &current->blocked, set);
2037                 break;
2038         case SIG_UNBLOCK:
2039                 signandsets(&current->blocked, &current->blocked, set);
2040                 break;
2041         case SIG_SETMASK:
2042                 current->blocked = *set;
2043                 break;
2044         default:
2045                 error = -EINVAL;
2046         }
2047         recalc_sigpending();
2048         spin_unlock_irq(&current->sighand->siglock);
2049
2050         return error;
2051 }
2052
2053 SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
2054                 sigset_t __user *, oset, size_t, sigsetsize)
2055 {
2056         int error = -EINVAL;
2057         sigset_t old_set, new_set;
2058
2059         /* XXX: Don't preclude handling different sized sigset_t's.  */
2060         if (sigsetsize != sizeof(sigset_t))
2061                 goto out;
2062
2063         if (set) {
2064                 error = -EFAULT;
2065                 if (copy_from_user(&new_set, set, sizeof(*set)))
2066                         goto out;
2067                 sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
2068
2069                 error = sigprocmask(how, &new_set, &old_set);
2070                 if (error)
2071                         goto out;
2072                 if (oset)
2073                         goto set_old;
2074         } else if (oset) {
2075                 spin_lock_irq(&current->sighand->siglock);
2076                 old_set = current->blocked;
2077                 spin_unlock_irq(&current->sighand->siglock);
2078
2079         set_old:
2080                 error = -EFAULT;
2081                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2082                         goto out;
2083         }
2084         error = 0;
2085 out:
2086         return error;
2087 }
2088
2089 long do_sigpending(void __user *set, unsigned long sigsetsize)
2090 {
2091         long error = -EINVAL;
2092         sigset_t pending;
2093
2094         if (sigsetsize > sizeof(sigset_t))
2095                 goto out;
2096
2097         spin_lock_irq(&current->sighand->siglock);
2098         sigorsets(&pending, &current->pending.signal,
2099                   &current->signal->shared_pending.signal);
2100         spin_unlock_irq(&current->sighand->siglock);
2101
2102         /* Outside the lock because only this thread touches it.  */
2103         sigandsets(&pending, &current->blocked, &pending);
2104
2105         error = -EFAULT;
2106         if (!copy_to_user(set, &pending, sigsetsize))
2107                 error = 0;
2108
2109 out:
2110         return error;
2111 }       
2112
2113 SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
2114 {
2115         return do_sigpending(set, sigsetsize);
2116 }
2117
2118 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
2119
2120 int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
2121 {
2122         int err;
2123
2124         if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
2125                 return -EFAULT;
2126         if (from->si_code < 0)
2127                 return __copy_to_user(to, from, sizeof(siginfo_t))
2128                         ? -EFAULT : 0;
2129         /*
2130          * If you change siginfo_t structure, please be sure
2131          * this code is fixed accordingly.
2132          * Please remember to update the signalfd_copyinfo() function
2133          * inside fs/signalfd.c too, in case siginfo_t changes.
2134          * It should never copy any pad contained in the structure
2135          * to avoid security leaks, but must copy the generic
2136          * 3 ints plus the relevant union member.
2137          */
2138         err = __put_user(from->si_signo, &to->si_signo);
2139         err |= __put_user(from->si_errno, &to->si_errno);
2140         err |= __put_user((short)from->si_code, &to->si_code);
2141         switch (from->si_code & __SI_MASK) {
2142         case __SI_KILL:
2143                 err |= __put_user(from->si_pid, &to->si_pid);
2144                 err |= __put_user(from->si_uid, &to->si_uid);
2145                 break;
2146         case __SI_TIMER:
2147                  err |= __put_user(from->si_tid, &to->si_tid);
2148                  err |= __put_user(from->si_overrun, &to->si_overrun);
2149                  err |= __put_user(from->si_ptr, &to->si_ptr);
2150                 break;
2151         case __SI_POLL:
2152                 err |= __put_user(from->si_band, &to->si_band);
2153                 err |= __put_user(from->si_fd, &to->si_fd);
2154                 break;
2155         case __SI_FAULT:
2156                 err |= __put_user(from->si_addr, &to->si_addr);
2157 #ifdef __ARCH_SI_TRAPNO
2158                 err |= __put_user(from->si_trapno, &to->si_trapno);
2159 #endif
2160                 break;
2161         case __SI_CHLD:
2162                 err |= __put_user(from->si_pid, &to->si_pid);
2163                 err |= __put_user(from->si_uid, &to->si_uid);
2164                 err |= __put_user(from->si_status, &to->si_status);
2165                 err |= __put_user(from->si_utime, &to->si_utime);
2166                 err |= __put_user(from->si_stime, &to->si_stime);
2167                 break;
2168         case __SI_RT: /* This is not generated by the kernel as of now. */
2169         case __SI_MESGQ: /* But this is */
2170                 err |= __put_user(from->si_pid, &to->si_pid);
2171                 err |= __put_user(from->si_uid, &to->si_uid);
2172                 err |= __put_user(from->si_ptr, &to->si_ptr);
2173                 break;
2174         default: /* this is just in case for now ... */
2175                 err |= __put_user(from->si_pid, &to->si_pid);
2176                 err |= __put_user(from->si_uid, &to->si_uid);
2177                 break;
2178         }
2179         return err;
2180 }
2181
2182 #endif
2183
2184 SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
2185                 siginfo_t __user *, uinfo, const struct timespec __user *, uts,
2186                 size_t, sigsetsize)
2187 {
2188         int ret, sig;
2189         sigset_t these;
2190         struct timespec ts;
2191         siginfo_t info;
2192         long timeout = 0;
2193
2194         /* XXX: Don't preclude handling different sized sigset_t's.  */
2195         if (sigsetsize != sizeof(sigset_t))
2196                 return -EINVAL;
2197
2198         if (copy_from_user(&these, uthese, sizeof(these)))
2199                 return -EFAULT;
2200                 
2201         /*
2202          * Invert the set of allowed signals to get those we
2203          * want to block.
2204          */
2205         sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
2206         signotset(&these);
2207
2208         if (uts) {
2209                 if (copy_from_user(&ts, uts, sizeof(ts)))
2210                         return -EFAULT;
2211                 if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
2212                     || ts.tv_sec < 0)
2213                         return -EINVAL;
2214         }
2215
2216         spin_lock_irq(&current->sighand->siglock);
2217         sig = dequeue_signal(current, &these, &info);
2218         if (!sig) {
2219                 timeout = MAX_SCHEDULE_TIMEOUT;
2220                 if (uts)
2221                         timeout = (timespec_to_jiffies(&ts)
2222                                    + (ts.tv_sec || ts.tv_nsec));
2223
2224                 if (timeout) {
2225                         /* None ready -- temporarily unblock those we're
2226                          * interested while we are sleeping in so that we'll
2227                          * be awakened when they arrive.  */
2228                         current->real_blocked = current->blocked;
2229                         sigandsets(&current->blocked, &current->blocked, &these);
2230                         recalc_sigpending();
2231                         spin_unlock_irq(&current->sighand->siglock);
2232
2233                         timeout = schedule_timeout_interruptible(timeout);
2234
2235                         spin_lock_irq(&current->sighand->siglock);
2236                         sig = dequeue_signal(current, &these, &info);
2237                         current->blocked = current->real_blocked;
2238                         siginitset(&current->real_blocked, 0);
2239                         recalc_sigpending();
2240                 }
2241         }
2242         spin_unlock_irq(&current->sighand->siglock);
2243
2244         if (sig) {
2245                 ret = sig;
2246                 if (uinfo) {
2247                         if (copy_siginfo_to_user(uinfo, &info))
2248                                 ret = -EFAULT;
2249                 }
2250         } else {
2251                 ret = -EAGAIN;
2252                 if (timeout)
2253                         ret = -EINTR;
2254         }
2255
2256         return ret;
2257 }
2258
2259 SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
2260 {
2261         struct siginfo info;
2262
2263         info.si_signo = sig;
2264         info.si_errno = 0;
2265         info.si_code = SI_USER;
2266         info.si_pid = task_tgid_vnr(current);
2267         info.si_uid = current_uid();
2268
2269         return kill_something_info(sig, &info, pid);
2270 }
2271
2272 static int do_tkill(pid_t tgid, pid_t pid, int sig)
2273 {
2274         int error;
2275         struct siginfo info;
2276         struct task_struct *p;
2277         unsigned long flags;
2278
2279         error = -ESRCH;
2280         info.si_signo = sig;
2281         info.si_errno = 0;
2282         info.si_code = SI_TKILL;
2283         info.si_pid = task_tgid_vnr(current);
2284         info.si_uid = current_uid();
2285
2286         rcu_read_lock();
2287         p = find_task_by_vpid(pid);
2288         if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
2289                 error = check_kill_permission(sig, &info, p);
2290                 /*
2291                  * The null signal is a permissions and process existence
2292                  * probe.  No signal is actually delivered.
2293                  *
2294                  * If lock_task_sighand() fails we pretend the task dies
2295                  * after receiving the signal. The window is tiny, and the
2296                  * signal is private anyway.
2297                  */
2298                 if (!error && sig && lock_task_sighand(p, &flags)) {
2299                         error = specific_send_sig_info(sig, &info, p);
2300                         unlock_task_sighand(p, &flags);
2301                 }
2302         }
2303         rcu_read_unlock();
2304
2305         return error;
2306 }
2307
2308 /**
2309  *  sys_tgkill - send signal to one specific thread
2310  *  @tgid: the thread group ID of the thread
2311  *  @pid: the PID of the thread
2312  *  @sig: signal to be sent
2313  *
2314  *  This syscall also checks the @tgid and returns -ESRCH even if the PID
2315  *  exists but it's not belonging to the target process anymore. This
2316  *  method solves the problem of threads exiting and PIDs getting reused.
2317  */
2318 SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
2319 {
2320         /* This is only valid for single tasks */
2321         if (pid <= 0 || tgid <= 0)
2322                 return -EINVAL;
2323
2324         return do_tkill(tgid, pid, sig);
2325 }
2326
2327 /*
2328  *  Send a signal to only one task, even if it's a CLONE_THREAD task.
2329  */
2330 SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
2331 {
2332         /* This is only valid for single tasks */
2333         if (pid <= 0)
2334                 return -EINVAL;
2335
2336         return do_tkill(0, pid, sig);
2337 }
2338
2339 SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2340                 siginfo_t __user *, uinfo)
2341 {
2342         siginfo_t info;
2343
2344         if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
2345                 return -EFAULT;
2346
2347         /* Not even root can pretend to send signals from the kernel.
2348            Nor can they impersonate a kill(), which adds source info.  */
2349         if (info.si_code >= 0)
2350                 return -EPERM;
2351         info.si_signo = sig;
2352
2353         /* POSIX.1b doesn't mention process groups.  */
2354         return kill_proc_info(sig, &info, pid);
2355 }
2356
2357 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2358 {
2359         struct task_struct *t = current;
2360         struct k_sigaction *k;
2361         sigset_t mask;
2362
2363         if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
2364                 return -EINVAL;
2365
2366         k = &t->sighand->action[sig-1];
2367
2368         spin_lock_irq(&current->sighand->siglock);
2369         if (oact)
2370                 *oact = *k;
2371
2372         if (act) {
2373                 sigdelsetmask(&act->sa.sa_mask,
2374                               sigmask(SIGKILL) | sigmask(SIGSTOP));
2375                 *k = *act;
2376                 /*
2377                  * POSIX 3.3.1.3:
2378                  *  "Setting a signal action to SIG_IGN for a signal that is
2379                  *   pending shall cause the pending signal to be discarded,
2380                  *   whether or not it is blocked."
2381                  *
2382                  *  "Setting a signal action to SIG_DFL for a signal that is
2383                  *   pending and whose default action is to ignore the signal
2384                  *   (for example, SIGCHLD), shall cause the pending signal to
2385                  *   be discarded, whether or not it is blocked"
2386                  */
2387                 if (sig_handler_ignored(sig_handler(t, sig), sig)) {
2388                         sigemptyset(&mask);
2389                         sigaddset(&mask, sig);
2390                         rm_from_queue_full(&mask, &t->signal->shared_pending);
2391                         do {
2392                                 rm_from_queue_full(&mask, &t->pending);
2393                                 t = next_thread(t);
2394                         } while (t != current);
2395                 }
2396         }
2397
2398         spin_unlock_irq(&current->sighand->siglock);
2399         return 0;
2400 }
2401
2402 int 
2403 do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp)
2404 {
2405         stack_t oss;
2406         int error;
2407
2408         if (uoss) {
2409                 oss.ss_sp = (void __user *) current->sas_ss_sp;
2410                 oss.ss_size = current->sas_ss_size;
2411                 oss.ss_flags = sas_ss_flags(sp);
2412         }
2413
2414         if (uss) {
2415                 void __user *ss_sp;
2416                 size_t ss_size;
2417                 int ss_flags;
2418
2419                 error = -EFAULT;
2420                 if (!access_ok(VERIFY_READ, uss, sizeof(*uss))
2421                     || __get_user(ss_sp, &uss->ss_sp)
2422                     || __get_user(ss_flags, &uss->ss_flags)
2423                     || __get_user(ss_size, &uss->ss_size))
2424                         goto out;
2425
2426                 error = -EPERM;
2427                 if (on_sig_stack(sp))
2428                         goto out;
2429
2430                 error = -EINVAL;
2431                 /*
2432                  *
2433                  * Note - this code used to test ss_flags incorrectly
2434                  *        old code may have been written using ss_flags==0
2435                  *        to mean ss_flags==SS_ONSTACK (as this was the only
2436                  *        way that worked) - this fix preserves that older
2437                  *        mechanism
2438                  */
2439                 if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
2440                         goto out;
2441
2442                 if (ss_flags == SS_DISABLE) {
2443                         ss_size = 0;
2444                         ss_sp = NULL;
2445                 } else {
2446                         error = -ENOMEM;
2447                         if (ss_size < MINSIGSTKSZ)
2448                                 goto out;
2449                 }
2450
2451                 current->sas_ss_sp = (unsigned long) ss_sp;
2452                 current->sas_ss_size = ss_size;
2453         }
2454
2455         if (uoss) {
2456                 error = -EFAULT;
2457                 if (copy_to_user(uoss, &oss, sizeof(oss)))
2458                         goto out;
2459         }
2460
2461         error = 0;
2462 out:
2463         return error;
2464 }
2465
2466 #ifdef __ARCH_WANT_SYS_SIGPENDING
2467
2468 SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
2469 {
2470         return do_sigpending(set, sizeof(*set));
2471 }
2472
2473 #endif
2474
2475 #ifdef __ARCH_WANT_SYS_SIGPROCMASK
2476 /* Some platforms have their own version with special arguments others
2477    support only sys_rt_sigprocmask.  */
2478
2479 SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
2480                 old_sigset_t __user *, oset)
2481 {
2482         int error;
2483         old_sigset_t old_set, new_set;
2484
2485         if (set) {
2486                 error = -EFAULT;
2487                 if (copy_from_user(&new_set, set, sizeof(*set)))
2488                         goto out;
2489                 new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
2490
2491                 spin_lock_irq(&current->sighand->siglock);
2492                 old_set = current->blocked.sig[0];
2493
2494                 error = 0;
2495                 switch (how) {
2496                 default:
2497                         error = -EINVAL;
2498                         break;
2499                 case SIG_BLOCK:
2500                         sigaddsetmask(&current->blocked, new_set);
2501                         break;
2502                 case SIG_UNBLOCK:
2503                         sigdelsetmask(&current->blocked, new_set);
2504                         break;
2505                 case SIG_SETMASK:
2506                         current->blocked.sig[0] = new_set;
2507                         break;
2508                 }
2509
2510                 recalc_sigpending();
2511                 spin_unlock_irq(&current->sighand->siglock);
2512                 if (error)
2513                         goto out;
2514                 if (oset)
2515                         goto set_old;
2516         } else if (oset) {
2517                 old_set = current->blocked.sig[0];
2518         set_old:
2519                 error = -EFAULT;
2520                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2521                         goto out;
2522         }
2523         error = 0;
2524 out:
2525         return error;
2526 }
2527 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
2528
2529 #ifdef __ARCH_WANT_SYS_RT_SIGACTION
2530 SYSCALL_DEFINE4(rt_sigaction, int, sig,
2531                 const struct sigaction __user *, act,
2532                 struct sigaction __user *, oact,
2533                 size_t, sigsetsize)
2534 {
2535         struct k_sigaction new_sa, old_sa;
2536         int ret = -EINVAL;
2537
2538         /* XXX: Don't preclude handling different sized sigset_t's.  */
2539         if (sigsetsize != sizeof(sigset_t))
2540                 goto out;
2541
2542         if (act) {
2543                 if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
2544                         return -EFAULT;
2545         }
2546
2547         ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
2548
2549         if (!ret && oact) {
2550                 if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
2551                         return -EFAULT;
2552         }
2553 out:
2554         return ret;
2555 }
2556 #endif /* __ARCH_WANT_SYS_RT_SIGACTION */
2557
2558 #ifdef __ARCH_WANT_SYS_SGETMASK
2559
2560 /*
2561  * For backwards compatibility.  Functionality superseded by sigprocmask.
2562  */
2563 SYSCALL_DEFINE0(sgetmask)
2564 {
2565         /* SMP safe */
2566         return current->blocked.sig[0];
2567 }
2568
2569 SYSCALL_DEFINE1(ssetmask, int, newmask)
2570 {
2571         int old;
2572
2573         spin_lock_irq(&current->sighand->siglock);
2574         old = current->blocked.sig[0];
2575
2576         siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
2577                                                   sigmask(SIGSTOP)));
2578         recalc_sigpending();
2579         spin_unlock_irq(&current->sighand->siglock);
2580
2581         return old;
2582 }
2583 #endif /* __ARCH_WANT_SGETMASK */
2584
2585 #ifdef __ARCH_WANT_SYS_SIGNAL
2586 /*
2587  * For backwards compatibility.  Functionality superseded by sigaction.
2588  */
2589 SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
2590 {
2591         struct k_sigaction new_sa, old_sa;
2592         int ret;
2593
2594         new_sa.sa.sa_handler = handler;
2595         new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
2596         sigemptyset(&new_sa.sa.sa_mask);
2597
2598         ret = do_sigaction(sig, &new_sa, &old_sa);
2599
2600         return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
2601 }
2602 #endif /* __ARCH_WANT_SYS_SIGNAL */
2603
2604 #ifdef __ARCH_WANT_SYS_PAUSE
2605
2606 SYSCALL_DEFINE0(pause)
2607 {
2608         current->state = TASK_INTERRUPTIBLE;
2609         schedule();
2610         return -ERESTARTNOHAND;
2611 }
2612
2613 #endif
2614
2615 #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
2616 SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
2617 {
2618         sigset_t newset;
2619
2620         /* XXX: Don't preclude handling different sized sigset_t's.  */
2621         if (sigsetsize != sizeof(sigset_t))
2622                 return -EINVAL;
2623
2624         if (copy_from_user(&newset, unewset, sizeof(newset)))
2625                 return -EFAULT;
2626         sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
2627
2628         spin_lock_irq(&current->sighand->siglock);
2629         current->saved_sigmask = current->blocked;
2630         current->blocked = newset;
2631         recalc_sigpending();
2632         spin_unlock_irq(&current->sighand->siglock);
2633
2634         current->state = TASK_INTERRUPTIBLE;
2635         schedule();
2636         set_restore_sigmask();
2637         return -ERESTARTNOHAND;
2638 }
2639 #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
2640
2641 __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
2642 {
2643         return NULL;
2644 }
2645
2646 void __init signals_init(void)
2647 {
2648         sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
2649 }