X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fposix-cpu-timers.c;h=bc7704b3a4431434e15bacb3127cfb9a5e1517a9;hb=a3a2e76c77fa22b114e421ac11dec0c56c3503fb;hp=d30b304a33849fd79bee7c66808d76297ce3b0be;hpb=e80eda94d3eaf1d12cfc97878eff77cd679dabc9;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d30b304..bc7704b 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -4,10 +4,29 @@ #include #include -#include #include +#include +#include +#include +#include -static int check_clock(clockid_t which_clock) +/* + * Called after updating RLIMIT_CPU to set timer expiration if necessary. + */ +void update_rlimit_cpu(unsigned long rlim_new) +{ + cputime_t cputime = secs_to_cputime(rlim_new); + struct signal_struct *const sig = current->signal; + + if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || + cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { + spin_lock_irq(¤t->sighand->siglock); + set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); + spin_unlock_irq(¤t->sighand->siglock); + } +} + +static int check_clock(const clockid_t which_clock) { int error = 0; struct task_struct *p; @@ -20,9 +39,9 @@ static int check_clock(clockid_t which_clock) return 0; read_lock(&tasklist_lock); - p = find_task_by_pid(pid); - if (!p || (CPUCLOCK_PERTHREAD(which_clock) ? - p->tgid != current->tgid : p->tgid != pid)) { + p = find_task_by_vpid(pid); + if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? + same_thread_group(p, current) : thread_group_leader(p))) { error = -EINVAL; } read_unlock(&tasklist_lock); @@ -31,31 +50,29 @@ static int check_clock(clockid_t which_clock) } static inline union cpu_time_count -timespec_to_sample(clockid_t which_clock, const struct timespec *tp) +timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) { union cpu_time_count ret; ret.sched = 0; /* high half always zero when .cpu used */ if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - ret.sched = tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; + ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; } else { ret.cpu = timespec_to_cputime(tp); } return ret; } -static void sample_to_timespec(clockid_t which_clock, +static void sample_to_timespec(const clockid_t which_clock, union cpu_time_count cpu, struct timespec *tp) { - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - tp->tv_sec = div_long_long_rem(cpu.sched, - NSEC_PER_SEC, &tp->tv_nsec); - } else { + if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) + *tp = ns_to_timespec(cpu.sched); + else cputime_to_timespec(cpu.cpu, tp); - } } -static inline int cpu_time_before(clockid_t which_clock, +static inline int cpu_time_before(const clockid_t which_clock, union cpu_time_count now, union cpu_time_count then) { @@ -65,7 +82,7 @@ static inline int cpu_time_before(clockid_t which_clock, return cputime_lt(now.cpu, then.cpu); } } -static inline void cpu_time_add(clockid_t which_clock, +static inline void cpu_time_add(const clockid_t which_clock, union cpu_time_count *acc, union cpu_time_count val) { @@ -75,7 +92,7 @@ static inline void cpu_time_add(clockid_t which_clock, acc->cpu = cputime_add(acc->cpu, val.cpu); } } -static inline union cpu_time_count cpu_time_sub(clockid_t which_clock, +static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, union cpu_time_count a, union cpu_time_count b) { @@ -88,10 +105,23 @@ static inline union cpu_time_count cpu_time_sub(clockid_t which_clock, } /* + * Divide and limit the result to res >= 1 + * + * This is necessary to prevent signal delivery starvation, when the result of + * the division would be rounded down to 0. + */ +static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div) +{ + cputime_t res = cputime_div(time, div); + + return max_t(cputime_t, res, 1); +} + +/* * Update expiry time from increment, and increase overrun count, * given the current clock sample. */ -static inline void bump_cpu_timer(struct k_itimer *timer, +static void bump_cpu_timer(struct k_itimer *timer, union cpu_time_count now) { int i; @@ -110,7 +140,7 @@ static inline void bump_cpu_timer(struct k_itimer *timer, for (i = 0; incr < delta - incr; i++) incr = incr << 1; for (; i >= 0; incr >>= 1, i--) { - if (delta <= incr) + if (delta < incr) continue; timer->it.cpu.expires.sched += incr; timer->it_overrun += 1 << i; @@ -128,7 +158,7 @@ static inline void bump_cpu_timer(struct k_itimer *timer, for (i = 0; cputime_lt(incr, cputime_sub(delta, incr)); i++) incr = cputime_add(incr, incr); for (; i >= 0; incr = cputime_halve(incr), i--) { - if (cputime_le(delta, incr)) + if (cputime_lt(delta, incr)) continue; timer->it.cpu.expires.cpu = cputime_add(timer->it.cpu.expires.cpu, incr); @@ -146,12 +176,8 @@ static inline cputime_t virt_ticks(struct task_struct *p) { return p->utime; } -static inline unsigned long long sched_ns(struct task_struct *p) -{ - return (p == current) ? current_sched_time(p) : p->sched_time; -} -int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) { int error = check_clock(which_clock); if (!error) { @@ -169,7 +195,7 @@ int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) return error; } -int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp) +int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) { /* * You can never reset a CPU clock, but we check for other errors @@ -186,7 +212,7 @@ int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp) /* * Sample a per-thread clock for the given task. */ -static int cpu_clock_sample(clockid_t which_clock, struct task_struct *p, +static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, union cpu_time_count *cpu) { switch (CPUCLOCK_WHICH(which_clock)) { @@ -199,81 +225,107 @@ static int cpu_clock_sample(clockid_t which_clock, struct task_struct *p, cpu->cpu = virt_ticks(p); break; case CPUCLOCK_SCHED: - cpu->sched = sched_ns(p); + cpu->sched = task_sched_runtime(p); break; } return 0; } +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) +{ + struct sighand_struct *sighand; + struct signal_struct *sig; + struct task_struct *t; + + *times = INIT_CPUTIME; + + rcu_read_lock(); + sighand = rcu_dereference(tsk->sighand); + if (!sighand) + goto out; + + sig = tsk->signal; + + t = tsk; + do { + times->utime = cputime_add(times->utime, t->utime); + times->stime = cputime_add(times->stime, t->stime); + times->sum_exec_runtime += t->se.sum_exec_runtime; + + t = next_thread(t); + } while (t != tsk); + + times->utime = cputime_add(times->utime, sig->utime); + times->stime = cputime_add(times->stime, sig->stime); + times->sum_exec_runtime += sig->sum_sched_runtime; +out: + rcu_read_unlock(); +} + +static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b) +{ + if (cputime_gt(b->utime, a->utime)) + a->utime = b->utime; + + if (cputime_gt(b->stime, a->stime)) + a->stime = b->stime; + + if (b->sum_exec_runtime > a->sum_exec_runtime) + a->sum_exec_runtime = b->sum_exec_runtime; +} + +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) +{ + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct task_cputime sum; + unsigned long flags; + + spin_lock_irqsave(&cputimer->lock, flags); + if (!cputimer->running) { + cputimer->running = 1; + /* + * The POSIX timer interface allows for absolute time expiry + * values through the TIMER_ABSTIME flag, therefore we have + * to synchronize the timer to the clock every time we start + * it. + */ + thread_group_cputime(tsk, &sum); + update_gt_cputime(&cputimer->cputime, &sum); + } + *times = cputimer->cputime; + spin_unlock_irqrestore(&cputimer->lock, flags); +} + /* * Sample a process (thread group) clock for the given group_leader task. * Must be called with tasklist_lock held for reading. - * Must be called with tasklist_lock held for reading, and p->sighand->siglock. */ -static int cpu_clock_sample_group_locked(unsigned int clock_idx, - struct task_struct *p, - union cpu_time_count *cpu) +static int cpu_clock_sample_group(const clockid_t which_clock, + struct task_struct *p, + union cpu_time_count *cpu) { - struct task_struct *t = p; - switch (clock_idx) { + struct task_cputime cputime; + + switch (CPUCLOCK_WHICH(which_clock)) { default: return -EINVAL; case CPUCLOCK_PROF: - cpu->cpu = cputime_add(p->signal->utime, p->signal->stime); - do { - cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t)); - t = next_thread(t); - } while (t != p); + thread_group_cputime(p, &cputime); + cpu->cpu = cputime_add(cputime.utime, cputime.stime); break; case CPUCLOCK_VIRT: - cpu->cpu = p->signal->utime; - do { - cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t)); - t = next_thread(t); - } while (t != p); + thread_group_cputime(p, &cputime); + cpu->cpu = cputime.utime; break; case CPUCLOCK_SCHED: - cpu->sched = p->signal->sched_time; - /* Add in each other live thread. */ - while ((t = next_thread(t)) != p) { - cpu->sched += t->sched_time; - } - if (p->tgid == current->tgid) { - /* - * We're sampling ourselves, so include the - * cycles not yet banked. We still omit - * other threads running on other CPUs, - * so the total can always be behind as - * much as max(nthreads-1,ncpus) * (NSEC_PER_SEC/HZ). - */ - cpu->sched += current_sched_time(current); - } else { - cpu->sched += p->sched_time; - } + cpu->sched = thread_group_sched_runtime(p); break; } return 0; } -/* - * Sample a process (thread group) clock for the given group_leader task. - * Must be called with tasklist_lock held for reading. - */ -static int cpu_clock_sample_group(clockid_t which_clock, - struct task_struct *p, - union cpu_time_count *cpu) -{ - int ret; - unsigned long flags; - spin_lock_irqsave(&p->sighand->siglock, flags); - ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p, - cpu); - spin_unlock_irqrestore(&p->sighand->siglock, flags); - return ret; -} - -int posix_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int error = -EINVAL; @@ -302,20 +354,25 @@ int posix_cpu_clock_get(clockid_t which_clock, struct timespec *tp) * should be able to see it. */ struct task_struct *p; - read_lock(&tasklist_lock); - p = find_task_by_pid(pid); + rcu_read_lock(); + p = find_task_by_vpid(pid); if (p) { if (CPUCLOCK_PERTHREAD(which_clock)) { - if (p->tgid == current->tgid) { + if (same_thread_group(p, current)) { error = cpu_clock_sample(which_clock, p, &rtn); } - } else if (p->tgid == pid && p->signal) { - error = cpu_clock_sample_group(which_clock, - p, &rtn); + } else { + read_lock(&tasklist_lock); + if (thread_group_leader(p) && p->signal) { + error = + cpu_clock_sample_group(which_clock, + p, &rtn); + } + read_unlock(&tasklist_lock); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); } if (error) @@ -327,7 +384,8 @@ int posix_cpu_clock_get(clockid_t which_clock, struct timespec *tp) /* * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. - * This is called from sys_timer_create with the new timer already locked. + * This is called from sys_timer_create() and do_cpu_nanosleep() with the + * new timer already all-zeros initialized. */ int posix_cpu_timer_create(struct k_itimer *new_timer) { @@ -339,24 +397,22 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) return -EINVAL; INIT_LIST_HEAD(&new_timer->it.cpu.entry); - new_timer->it.cpu.incr.sched = 0; - new_timer->it.cpu.expires.sched = 0; read_lock(&tasklist_lock); if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { if (pid == 0) { p = current; } else { - p = find_task_by_pid(pid); - if (p && p->tgid != current->tgid) + p = find_task_by_vpid(pid); + if (p && !same_thread_group(p, current)) p = NULL; } } else { if (pid == 0) { p = current->group_leader; } else { - p = find_task_by_pid(pid); - if (p && p->tgid != pid) + p = find_task_by_vpid(pid); + if (p && !thread_group_leader(p)) p = NULL; } } @@ -380,14 +436,9 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) int posix_cpu_timer_del(struct k_itimer *timer) { struct task_struct *p = timer->it.cpu.task; + int ret = 0; - if (timer->it.cpu.firing) - return TIMER_RETRY; - - if (unlikely(p == NULL)) - return 0; - - if (!list_empty(&timer->it.cpu.entry)) { + if (likely(p != NULL)) { read_lock(&tasklist_lock); if (unlikely(p->signal == NULL)) { /* @@ -396,18 +447,20 @@ int posix_cpu_timer_del(struct k_itimer *timer) */ BUG_ON(!list_empty(&timer->it.cpu.entry)); } else { - /* - * Take us off the task's timer list. - */ spin_lock(&p->sighand->siglock); - list_del(&timer->it.cpu.entry); + if (timer->it.cpu.firing) + ret = TIMER_RETRY; + else + list_del(&timer->it.cpu.entry); spin_unlock(&p->sighand->siglock); } read_unlock(&tasklist_lock); + + if (!ret) + put_task_struct(p); } - put_task_struct(p); - return 0; + return ret; } /* @@ -418,14 +471,12 @@ int posix_cpu_timer_del(struct k_itimer *timer) */ static void cleanup_timers(struct list_head *head, cputime_t utime, cputime_t stime, - unsigned long long sched_time) + unsigned long long sum_exec_runtime) { struct cpu_timer_list *timer, *next; cputime_t ptime = cputime_add(utime, stime); list_for_each_entry_safe(timer, next, head, entry) { - put_task_struct(timer->task); - timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, ptime)) { timer->expires.cpu = cputime_zero; @@ -437,8 +488,6 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { - put_task_struct(timer->task); - timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, utime)) { timer->expires.cpu = cputime_zero; @@ -450,13 +499,11 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { - put_task_struct(timer->task); - timer->task = NULL; list_del_init(&timer->entry); - if (timer->expires.sched < sched_time) { + if (timer->expires.sched < sum_exec_runtime) { timer->expires.sched = 0; } else { - timer->expires.sched -= sched_time; + timer->expires.sched -= sum_exec_runtime; } } } @@ -469,81 +516,17 @@ static void cleanup_timers(struct list_head *head, void posix_cpu_timers_exit(struct task_struct *tsk) { cleanup_timers(tsk->cpu_timers, - tsk->utime, tsk->stime, tsk->sched_time); + tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); } void posix_cpu_timers_exit_group(struct task_struct *tsk) { - cleanup_timers(tsk->signal->cpu_timers, - cputime_add(tsk->utime, tsk->signal->utime), - cputime_add(tsk->stime, tsk->signal->stime), - tsk->sched_time + tsk->signal->sched_time); -} - - -/* - * Set the expiry times of all the threads in the process so one of them - * will go off before the process cumulative expiry total is reached. - */ -static void process_timer_rebalance(struct task_struct *p, - unsigned int clock_idx, - union cpu_time_count expires, - union cpu_time_count val) -{ - cputime_t ticks, left; - unsigned long long ns, nsleft; - struct task_struct *t = p; - unsigned int nthreads = atomic_read(&p->signal->live); + struct signal_struct *const sig = tsk->signal; - switch (clock_idx) { - default: - BUG(); - break; - case CPUCLOCK_PROF: - left = cputime_div(cputime_sub(expires.cpu, val.cpu), - nthreads); - do { - if (!unlikely(t->exit_state)) { - ticks = cputime_add(prof_ticks(t), left); - if (cputime_eq(t->it_prof_expires, - cputime_zero) || - cputime_gt(t->it_prof_expires, ticks)) { - t->it_prof_expires = ticks; - } - } - t = next_thread(t); - } while (t != p); - break; - case CPUCLOCK_VIRT: - left = cputime_div(cputime_sub(expires.cpu, val.cpu), - nthreads); - do { - if (!unlikely(t->exit_state)) { - ticks = cputime_add(virt_ticks(t), left); - if (cputime_eq(t->it_virt_expires, - cputime_zero) || - cputime_gt(t->it_virt_expires, ticks)) { - t->it_virt_expires = ticks; - } - } - t = next_thread(t); - } while (t != p); - break; - case CPUCLOCK_SCHED: - nsleft = expires.sched - val.sched; - do_div(nsleft, nthreads); - do { - if (!unlikely(t->exit_state)) { - ns = t->sched_time + nsleft; - if (t->it_sched_expires == 0 || - t->it_sched_expires > ns) { - t->it_sched_expires = ns; - } - } - t = next_thread(t); - } while (t != p); - break; - } + cleanup_timers(tsk->signal->cpu_timers, + cputime_add(tsk->utime, sig->utime), + cputime_add(tsk->stime, sig->stime), + tsk->se.sum_exec_runtime + sig->sum_sched_runtime); } static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) @@ -559,6 +542,17 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) now); } +static inline int expires_gt(cputime_t expires, cputime_t new_exp) +{ + return cputime_eq(expires, cputime_zero) || + cputime_gt(expires, new_exp); +} + +static inline int expires_le(cputime_t expires, cputime_t new_exp) +{ + return !cputime_eq(expires, cputime_zero) && + cputime_le(expires, new_exp); +} /* * Insert the timer on the appropriate list before any timers that * expire later. This must be called with the tasklist_lock held @@ -582,17 +576,15 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) listpos = head; if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { list_for_each_entry(next, head, entry) { - if (next->expires.sched > nt->expires.sched) { - listpos = &next->entry; + if (next->expires.sched > nt->expires.sched) break; - } + listpos = &next->entry; } } else { list_for_each_entry(next, head, entry) { - if (cputime_gt(next->expires.cpu, nt->expires.cpu)) { - listpos = &next->entry; + if (cputime_gt(next->expires.cpu, nt->expires.cpu)) break; - } + listpos = &next->entry; } } list_add(&nt->entry, listpos); @@ -605,61 +597,56 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) { + union cpu_time_count *exp = &nt->expires; + switch (CPUCLOCK_WHICH(timer->it_clock)) { default: BUG(); case CPUCLOCK_PROF: - if (cputime_eq(p->it_prof_expires, - cputime_zero) || - cputime_gt(p->it_prof_expires, - nt->expires.cpu)) - p->it_prof_expires = nt->expires.cpu; + if (expires_gt(p->cputime_expires.prof_exp, + exp->cpu)) + p->cputime_expires.prof_exp = exp->cpu; break; case CPUCLOCK_VIRT: - if (cputime_eq(p->it_virt_expires, - cputime_zero) || - cputime_gt(p->it_virt_expires, - nt->expires.cpu)) - p->it_virt_expires = nt->expires.cpu; + if (expires_gt(p->cputime_expires.virt_exp, + exp->cpu)) + p->cputime_expires.virt_exp = exp->cpu; break; case CPUCLOCK_SCHED: - if (p->it_sched_expires == 0 || - p->it_sched_expires > nt->expires.sched) - p->it_sched_expires = nt->expires.sched; + if (p->cputime_expires.sched_exp == 0 || + p->cputime_expires.sched_exp > exp->sched) + p->cputime_expires.sched_exp = + exp->sched; break; } } else { + struct signal_struct *const sig = p->signal; + union cpu_time_count *exp = &timer->it.cpu.expires; + /* - * For a process timer, we must balance - * all the live threads' expirations. + * For a process timer, set the cached expiration time. */ switch (CPUCLOCK_WHICH(timer->it_clock)) { default: BUG(); case CPUCLOCK_VIRT: - if (!cputime_eq(p->signal->it_virt_expires, - cputime_zero) && - cputime_lt(p->signal->it_virt_expires, - timer->it.cpu.expires.cpu)) + if (expires_le(sig->it[CPUCLOCK_VIRT].expires, + exp->cpu)) break; - goto rebalance; + sig->cputime_expires.virt_exp = exp->cpu; + break; case CPUCLOCK_PROF: - if (!cputime_eq(p->signal->it_prof_expires, - cputime_zero) && - cputime_lt(p->signal->it_prof_expires, - timer->it.cpu.expires.cpu)) + if (expires_le(sig->it[CPUCLOCK_PROF].expires, + exp->cpu)) break; - i = p->signal->rlim[RLIMIT_CPU].rlim_cur; + i = sig->rlim[RLIMIT_CPU].rlim_cur; if (i != RLIM_INFINITY && - i <= cputime_to_secs(timer->it.cpu.expires.cpu)) + i <= cputime_to_secs(exp->cpu)) break; - goto rebalance; + sig->cputime_expires.prof_exp = exp->cpu; + break; case CPUCLOCK_SCHED: - rebalance: - process_timer_rebalance( - timer->it.cpu.task, - CPUCLOCK_WHICH(timer->it_clock), - timer->it.cpu.expires, now); + sig->cputime_expires.sched_exp = exp->sched; break; } } @@ -698,6 +685,33 @@ static void cpu_timer_fire(struct k_itimer *timer) } /* + * Sample a process (thread group) timer for the given group_leader task. + * Must be called with tasklist_lock held for reading. + */ +static int cpu_timer_sample_group(const clockid_t which_clock, + struct task_struct *p, + union cpu_time_count *cpu) +{ + struct task_cputime cputime; + + thread_group_cputimer(p, &cputime); + switch (CPUCLOCK_WHICH(which_clock)) { + default: + return -EINVAL; + case CPUCLOCK_PROF: + cpu->cpu = cputime_add(cputime.utime, cputime.stime); + break; + case CPUCLOCK_VIRT: + cpu->cpu = cputime.utime; + break; + case CPUCLOCK_SCHED: + cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); + break; + } + return 0; +} + +/* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. * If we return TIMER_RETRY, it's necessary to release the timer's lock @@ -736,9 +750,15 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, * Disarm any old timer after extracting its expiry time. */ BUG_ON(!irqs_disabled()); + + ret = 0; spin_lock(&p->sighand->siglock); old_expires = timer->it.cpu.expires; - list_del_init(&timer->it.cpu.entry); + if (unlikely(timer->it.cpu.firing)) { + timer->it.cpu.firing = -1; + ret = TIMER_RETRY; + } else + list_del_init(&timer->it.cpu.entry); spin_unlock(&p->sighand->siglock); /* @@ -752,7 +772,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &val); } else { - cpu_clock_sample_group(timer->it_clock, p, &val); + cpu_timer_sample_group(timer->it_clock, p, &val); } if (old) { @@ -786,7 +806,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, } } - if (unlikely(timer->it.cpu.firing)) { + if (unlikely(ret)) { /* * We are colliding with the timer actually firing. * Punt after filling in the timer's old value, and @@ -794,8 +814,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, * it as an overrun (thanks to bump_cpu_timer above). */ read_unlock(&tasklist_lock); - timer->it.cpu.firing = -1; - ret = TIMER_RETRY; goto out; } @@ -902,7 +920,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) read_unlock(&tasklist_lock); goto dead; } else { - cpu_clock_sample_group(timer->it_clock, p, &now); + cpu_timer_sample_group(timer->it_clock, p, &now); clear_dead = (unlikely(p->exit_state) && thread_group_empty(p)); } @@ -963,15 +981,17 @@ static void check_thread_timers(struct task_struct *tsk, { int maxfire; struct list_head *timers = tsk->cpu_timers; + struct signal_struct *const sig = tsk->signal; + unsigned long soft; maxfire = 20; - tsk->it_prof_expires = cputime_zero; + tsk->cputime_expires.prof_exp = cputime_zero; while (!list_empty(timers)) { - struct cpu_timer_list *t = list_entry(timers->next, + struct cpu_timer_list *t = list_first_entry(timers, struct cpu_timer_list, entry); if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { - tsk->it_prof_expires = t->expires.cpu; + tsk->cputime_expires.prof_exp = t->expires.cpu; break; } t->firing = 1; @@ -980,13 +1000,13 @@ static void check_thread_timers(struct task_struct *tsk, ++timers; maxfire = 20; - tsk->it_virt_expires = cputime_zero; + tsk->cputime_expires.virt_exp = cputime_zero; while (!list_empty(timers)) { - struct cpu_timer_list *t = list_entry(timers->next, + struct cpu_timer_list *t = list_first_entry(timers, struct cpu_timer_list, entry); if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { - tsk->it_virt_expires = t->expires.cpu; + tsk->cputime_expires.virt_exp = t->expires.cpu; break; } t->firing = 1; @@ -995,18 +1015,101 @@ static void check_thread_timers(struct task_struct *tsk, ++timers; maxfire = 20; - tsk->it_sched_expires = 0; + tsk->cputime_expires.sched_exp = 0; while (!list_empty(timers)) { - struct cpu_timer_list *t = list_entry(timers->next, + struct cpu_timer_list *t = list_first_entry(timers, struct cpu_timer_list, entry); - if (!--maxfire || tsk->sched_time < t->expires.sched) { - tsk->it_sched_expires = t->expires.sched; + if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { + tsk->cputime_expires.sched_exp = t->expires.sched; break; } t->firing = 1; list_move_tail(&t->entry, firing); } + + /* + * Check for the special case thread timers. + */ + soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur); + if (soft != RLIM_INFINITY) { + unsigned long hard = + ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max); + + if (hard != RLIM_INFINITY && + tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) { + /* + * At the hard limit, we just die. + * No need to calculate anything else now. + */ + __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); + return; + } + if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) { + /* + * At the soft limit, send a SIGXCPU every second. + */ + if (soft < hard) { + soft += USEC_PER_SEC; + sig->rlim[RLIMIT_RTTIME].rlim_cur = soft; + } + printk(KERN_INFO + "RT Watchdog Timeout: %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); + } + } +} + +static void stop_process_timers(struct signal_struct *sig) +{ + struct thread_group_cputimer *cputimer = &sig->cputimer; + unsigned long flags; + + if (!cputimer->running) + return; + + spin_lock_irqsave(&cputimer->lock, flags); + cputimer->running = 0; + spin_unlock_irqrestore(&cputimer->lock, flags); + + sig->cputime_expires.prof_exp = cputime_zero; + sig->cputime_expires.virt_exp = cputime_zero; + sig->cputime_expires.sched_exp = 0; +} + +static u32 onecputick; + +static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, + cputime_t *expires, cputime_t cur_time, int signo) +{ + if (cputime_eq(it->expires, cputime_zero)) + return; + + if (cputime_ge(cur_time, it->expires)) { + if (!cputime_eq(it->incr, cputime_zero)) { + it->expires = cputime_add(it->expires, it->incr); + it->error += it->incr_error; + if (it->error >= onecputick) { + it->expires = cputime_sub(it->expires, + cputime_one_jiffy); + it->error -= onecputick; + } + } else { + it->expires = cputime_zero; + } + + trace_itimer_expire(signo == SIGPROF ? + ITIMER_PROF : ITIMER_VIRTUAL, + tsk->signal->leader_pid, cur_time); + __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); + } + + if (!cputime_eq(it->expires, cputime_zero) && + (cputime_eq(*expires, cputime_zero) || + cputime_lt(it->expires, *expires))) { + *expires = it->expires; + } } /* @@ -1019,120 +1122,90 @@ static void check_process_timers(struct task_struct *tsk, { int maxfire; struct signal_struct *const sig = tsk->signal; - cputime_t utime, stime, ptime, virt_expires, prof_expires; - unsigned long long sched_time, sched_expires; - struct task_struct *t; + cputime_t utime, ptime, virt_expires, prof_expires; + unsigned long long sum_sched_runtime, sched_expires; struct list_head *timers = sig->cpu_timers; + struct task_cputime cputime; + unsigned long soft; /* * Don't sample the current process CPU clocks if there are no timers. */ if (list_empty(&timers[CPUCLOCK_PROF]) && - cputime_eq(sig->it_prof_expires, cputime_zero) && + cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) && sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && list_empty(&timers[CPUCLOCK_VIRT]) && - cputime_eq(sig->it_virt_expires, cputime_zero) && - list_empty(&timers[CPUCLOCK_SCHED])) + cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && + list_empty(&timers[CPUCLOCK_SCHED])) { + stop_process_timers(sig); return; + } /* * Collect the current process totals. */ - utime = sig->utime; - stime = sig->stime; - sched_time = sig->sched_time; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - sched_time += t->sched_time; - t = next_thread(t); - } while (t != tsk); - ptime = cputime_add(utime, stime); - + thread_group_cputimer(tsk, &cputime); + utime = cputime.utime; + ptime = cputime_add(utime, cputime.stime); + sum_sched_runtime = cputime.sum_exec_runtime; maxfire = 20; prof_expires = cputime_zero; while (!list_empty(timers)) { - struct cpu_timer_list *t = list_entry(timers->next, + struct cpu_timer_list *tl = list_first_entry(timers, struct cpu_timer_list, entry); - if (!--maxfire || cputime_lt(ptime, t->expires.cpu)) { - prof_expires = t->expires.cpu; + if (!--maxfire || cputime_lt(ptime, tl->expires.cpu)) { + prof_expires = tl->expires.cpu; break; } - t->firing = 1; - list_move_tail(&t->entry, firing); + tl->firing = 1; + list_move_tail(&tl->entry, firing); } ++timers; maxfire = 20; virt_expires = cputime_zero; while (!list_empty(timers)) { - struct cpu_timer_list *t = list_entry(timers->next, + struct cpu_timer_list *tl = list_first_entry(timers, struct cpu_timer_list, entry); - if (!--maxfire || cputime_lt(utime, t->expires.cpu)) { - virt_expires = t->expires.cpu; + if (!--maxfire || cputime_lt(utime, tl->expires.cpu)) { + virt_expires = tl->expires.cpu; break; } - t->firing = 1; - list_move_tail(&t->entry, firing); + tl->firing = 1; + list_move_tail(&tl->entry, firing); } ++timers; maxfire = 20; sched_expires = 0; while (!list_empty(timers)) { - struct cpu_timer_list *t = list_entry(timers->next, + struct cpu_timer_list *tl = list_first_entry(timers, struct cpu_timer_list, entry); - if (!--maxfire || sched_time < t->expires.sched) { - sched_expires = t->expires.sched; + if (!--maxfire || sum_sched_runtime < tl->expires.sched) { + sched_expires = tl->expires.sched; break; } - t->firing = 1; - list_move_tail(&t->entry, firing); + tl->firing = 1; + list_move_tail(&tl->entry, firing); } /* * Check for the special case process timers. */ - if (!cputime_eq(sig->it_prof_expires, cputime_zero)) { - if (cputime_ge(ptime, sig->it_prof_expires)) { - /* ITIMER_PROF fires and reloads. */ - sig->it_prof_expires = sig->it_prof_incr; - if (!cputime_eq(sig->it_prof_expires, cputime_zero)) { - sig->it_prof_expires = cputime_add( - sig->it_prof_expires, ptime); - } - __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk); - } - if (!cputime_eq(sig->it_prof_expires, cputime_zero) && - (cputime_eq(prof_expires, cputime_zero) || - cputime_lt(sig->it_prof_expires, prof_expires))) { - prof_expires = sig->it_prof_expires; - } - } - if (!cputime_eq(sig->it_virt_expires, cputime_zero)) { - if (cputime_ge(utime, sig->it_virt_expires)) { - /* ITIMER_VIRTUAL fires and reloads. */ - sig->it_virt_expires = sig->it_virt_incr; - if (!cputime_eq(sig->it_virt_expires, cputime_zero)) { - sig->it_virt_expires = cputime_add( - sig->it_virt_expires, utime); - } - __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk); - } - if (!cputime_eq(sig->it_virt_expires, cputime_zero) && - (cputime_eq(virt_expires, cputime_zero) || - cputime_lt(sig->it_virt_expires, virt_expires))) { - virt_expires = sig->it_virt_expires; - } - } - if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { + check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime, + SIGPROF); + check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, + SIGVTALRM); + soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); + if (soft != RLIM_INFINITY) { unsigned long psecs = cputime_to_secs(ptime); + unsigned long hard = + ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max); cputime_t x; - if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) { + if (psecs >= hard) { /* * At the hard limit, we just die. * No need to calculate anything else now. @@ -1140,74 +1213,35 @@ static void check_process_timers(struct task_struct *tsk, __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } - if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) { + if (psecs >= soft) { /* * At the soft limit, send a SIGXCPU every second. */ __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); - if (sig->rlim[RLIMIT_CPU].rlim_cur - < sig->rlim[RLIMIT_CPU].rlim_max) { - sig->rlim[RLIMIT_CPU].rlim_cur++; + if (soft < hard) { + soft++; + sig->rlim[RLIMIT_CPU].rlim_cur = soft; } } - x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); + x = secs_to_cputime(soft); if (cputime_eq(prof_expires, cputime_zero) || cputime_lt(x, prof_expires)) { prof_expires = x; } } - if (!cputime_eq(prof_expires, cputime_zero) || - !cputime_eq(virt_expires, cputime_zero) || - sched_expires != 0) { - /* - * Rebalance the threads' expiry times for the remaining - * process CPU timers. - */ - - cputime_t prof_left, virt_left, ticks; - unsigned long long sched_left, sched; - const unsigned int nthreads = atomic_read(&sig->live); - - prof_left = cputime_sub(prof_expires, utime); - prof_left = cputime_sub(prof_left, stime); - prof_left = cputime_div(prof_left, nthreads); - virt_left = cputime_sub(virt_expires, utime); - virt_left = cputime_div(virt_left, nthreads); - if (sched_expires) { - sched_left = sched_expires - sched_time; - do_div(sched_left, nthreads); - } else { - sched_left = 0; - } - t = tsk; - do { - ticks = cputime_add(cputime_add(t->utime, t->stime), - prof_left); - if (!cputime_eq(prof_expires, cputime_zero) && - (cputime_eq(t->it_prof_expires, cputime_zero) || - cputime_gt(t->it_prof_expires, ticks))) { - t->it_prof_expires = ticks; - } - - ticks = cputime_add(t->utime, virt_left); - if (!cputime_eq(virt_expires, cputime_zero) && - (cputime_eq(t->it_virt_expires, cputime_zero) || - cputime_gt(t->it_virt_expires, ticks))) { - t->it_virt_expires = ticks; - } - - sched = t->sched_time + sched_left; - if (sched_expires && (t->it_sched_expires == 0 || - t->it_sched_expires > sched)) { - t->it_sched_expires = sched; - } - - do { - t = next_thread(t); - } while (unlikely(t->exit_state)); - } while (t != tsk); - } + if (!cputime_eq(prof_expires, cputime_zero) && + (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || + cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) + sig->cputime_expires.prof_exp = prof_expires; + if (!cputime_eq(virt_expires, cputime_zero) && + (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) || + cputime_gt(sig->cputime_expires.virt_exp, virt_expires))) + sig->cputime_expires.virt_exp = virt_expires; + if (sched_expires != 0 && + (sig->cputime_expires.sched_exp == 0 || + sig->cputime_expires.sched_exp > sched_expires)) + sig->cputime_expires.sched_exp = sched_expires; } /* @@ -1223,7 +1257,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) /* * The task was cleaned up already, no future firings. */ - return; + goto out; /* * Fetch the current sample and update the timer's expiry time. @@ -1233,7 +1267,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) bump_cpu_timer(timer, now); if (unlikely(p->exit_state)) { clear_dead_task(timer, now); - return; + goto out; } read_lock(&tasklist_lock); /* arm_timer needs it. */ } else { @@ -1246,8 +1280,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) put_task_struct(p); timer->it.cpu.task = p = NULL; timer->it.cpu.expires.sched = 0; - read_unlock(&tasklist_lock); - return; + goto out_unlock; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { /* * We've noticed that the thread is dead, but @@ -1255,10 +1288,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) * drop our task ref. */ clear_dead_task(timer, now); - read_unlock(&tasklist_lock); - return; + goto out_unlock; } - cpu_clock_sample_group(timer->it_clock, p, &now); + cpu_timer_sample_group(timer->it_clock, p, &now); bump_cpu_timer(timer, now); /* Leave the tasklist_lock locked for the call below. */ } @@ -1268,7 +1300,97 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) */ arm_timer(timer, now); +out_unlock: read_unlock(&tasklist_lock); + +out: + timer->it_overrun_last = timer->it_overrun; + timer->it_overrun = -1; + ++timer->it_requeue_pending; +} + +/** + * task_cputime_zero - Check a task_cputime struct for all zero fields. + * + * @cputime: The struct to compare. + * + * Checks @cputime to see if all fields are zero. Returns true if all fields + * are zero, false if any field is nonzero. + */ +static inline int task_cputime_zero(const struct task_cputime *cputime) +{ + if (cputime_eq(cputime->utime, cputime_zero) && + cputime_eq(cputime->stime, cputime_zero) && + cputime->sum_exec_runtime == 0) + return 1; + return 0; +} + +/** + * task_cputime_expired - Compare two task_cputime entities. + * + * @sample: The task_cputime structure to be checked for expiration. + * @expires: Expiration times, against which @sample will be checked. + * + * Checks @sample against @expires to see if any field of @sample has expired. + * Returns true if any field of the former is greater than the corresponding + * field of the latter if the latter field is set. Otherwise returns false. + */ +static inline int task_cputime_expired(const struct task_cputime *sample, + const struct task_cputime *expires) +{ + if (!cputime_eq(expires->utime, cputime_zero) && + cputime_ge(sample->utime, expires->utime)) + return 1; + if (!cputime_eq(expires->stime, cputime_zero) && + cputime_ge(cputime_add(sample->utime, sample->stime), + expires->stime)) + return 1; + if (expires->sum_exec_runtime != 0 && + sample->sum_exec_runtime >= expires->sum_exec_runtime) + return 1; + return 0; +} + +/** + * fastpath_timer_check - POSIX CPU timers fast path. + * + * @tsk: The task (thread) being checked. + * + * Check the task and thread group timers. If both are zero (there are no + * timers set) return false. Otherwise snapshot the task and thread group + * timers and compare them with the corresponding expiration times. Return + * true if a timer has expired, else return false. + */ +static inline int fastpath_timer_check(struct task_struct *tsk) +{ + struct signal_struct *sig; + + /* tsk == current, ensure it is safe to use ->signal/sighand */ + if (unlikely(tsk->exit_state)) + return 0; + + if (!task_cputime_zero(&tsk->cputime_expires)) { + struct task_cputime task_sample = { + .utime = tsk->utime, + .stime = tsk->stime, + .sum_exec_runtime = tsk->se.sum_exec_runtime + }; + + if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) + return 1; + } + + sig = tsk->signal; + if (!task_cputime_zero(&sig->cputime_expires)) { + struct task_cputime group_sample; + + thread_group_cputimer(tsk, &group_sample); + if (task_cputime_expired(&group_sample, &sig->cputime_expires)) + return 1; + } + + return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; } /* @@ -1283,28 +1405,18 @@ void run_posix_cpu_timers(struct task_struct *tsk) BUG_ON(!irqs_disabled()); -#define UNEXPIRED(clock) \ - (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ - cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires)) - - if (UNEXPIRED(prof) && UNEXPIRED(virt) && - (tsk->it_sched_expires == 0 || - tsk->sched_time < tsk->it_sched_expires)) - return; - -#undef UNEXPIRED - - BUG_ON(tsk->exit_state); - /* - * Double-check with locks held. + * The fast path checks that there are no expired thread or thread + * group timers. If that's so, just return. */ - read_lock(&tasklist_lock); - spin_lock(&tsk->sighand->siglock); + if (!fastpath_timer_check(tsk)) + return; + spin_lock(&tsk->sighand->siglock); /* - * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] - * all the timers that are firing, and put them on the firing list. + * Here we take off tsk->signal->cpu_timers[N] and + * tsk->cpu_timers[N] all the timers that are firing, and + * put them on the firing list. */ check_thread_timers(tsk, &firing); check_process_timers(tsk, &firing); @@ -1318,7 +1430,6 @@ void run_posix_cpu_timers(struct task_struct *tsk) * spin until we've taken care of that timer below. */ spin_unlock(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); /* * Now that all the timers on our list have the firing flag, @@ -1327,29 +1438,28 @@ void run_posix_cpu_timers(struct task_struct *tsk) * timer call will interfere. */ list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { - int firing; + int cpu_firing; + spin_lock(&timer->it_lock); list_del_init(&timer->it.cpu.entry); - firing = timer->it.cpu.firing; + cpu_firing = timer->it.cpu.firing; timer->it.cpu.firing = 0; /* * The firing flag is -1 if we collided with a reset * of the timer, which already reported this * almost-firing as an overrun. So don't generate an event. */ - if (likely(firing >= 0)) { + if (likely(cpu_firing >= 0)) cpu_timer_fire(timer); - } spin_unlock(&timer->it_lock); } } /* * Set one of the process-wide special case CPU timers. - * The tasklist_lock and tsk->sighand->siglock must be held by the caller. - * The oldval argument is null for the RLIMIT_CPU timer, where *newval is - * absolute; non-null for ITIMER_*, where *newval is relative and we update - * it to be absolute, *oldval is absolute and we update it to be relative. + * The tsk->sighand->siglock must be held by the caller. + * The *newval argument is relative and we update it to be absolute, *oldval + * is absolute and we update it to be relative. */ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval) @@ -1358,13 +1468,13 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, struct list_head *head; BUG_ON(clock_idx == CPUCLOCK_SCHED); - cpu_clock_sample_group_locked(clock_idx, tsk, &now); + cpu_timer_sample_group(clock_idx, tsk, &now); if (oldval) { if (!cputime_eq(*oldval, cputime_zero)) { if (cputime_le(*oldval, now.cpu)) { /* Just about to fire. */ - *oldval = jiffies_to_cputime(1); + *oldval = cputime_one_jiffy; } else { *oldval = cputime_sub(*oldval, now.cpu); } @@ -1389,38 +1499,27 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, */ head = &tsk->signal->cpu_timers[clock_idx]; if (list_empty(head) || - cputime_ge(list_entry(head->next, + cputime_ge(list_first_entry(head, struct cpu_timer_list, entry)->expires.cpu, *newval)) { - /* - * Rejigger each thread's expiry time so that one will - * notice before we hit the process-cumulative expiry time. - */ - union cpu_time_count expires = { .sched = 0 }; - expires.cpu = *newval; - process_timer_rebalance(tsk, clock_idx, expires, now); + switch (clock_idx) { + case CPUCLOCK_PROF: + tsk->signal->cputime_expires.prof_exp = *newval; + break; + case CPUCLOCK_VIRT: + tsk->signal->cputime_expires.virt_exp = *newval; + break; + } } } -static long posix_cpu_clock_nanosleep_restart(struct restart_block *); - -int posix_cpu_nsleep(clockid_t which_clock, int flags, - struct timespec *rqtp) +static int do_cpu_nanosleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, struct itimerspec *it) { - struct restart_block *restart_block = - ¤t_thread_info()->restart_block; struct k_itimer timer; int error; /* - * Diagnose required errors first. - */ - if (CPUCLOCK_PERTHREAD(which_clock) && - (CPUCLOCK_PID(which_clock) == 0 || - CPUCLOCK_PID(which_clock) == current->pid)) - return -EINVAL; - - /* * Set up a temporary timer and then wait for it to go off. */ memset(&timer, 0, sizeof timer); @@ -1430,13 +1529,13 @@ int posix_cpu_nsleep(clockid_t which_clock, int flags, error = posix_cpu_timer_create(&timer); timer.it_process = current; if (!error) { - struct timespec __user *rmtp; static struct itimerspec zero_it; - struct itimerspec it = { .it_value = *rqtp, - .it_interval = {} }; + + memset(it, 0, sizeof *it); + it->it_value = *rqtp; spin_lock_irq(&timer.it_lock); - error = posix_cpu_timer_set(&timer, flags, &it, NULL); + error = posix_cpu_timer_set(&timer, flags, it, NULL); if (error) { spin_unlock_irq(&timer.it_lock); return error; @@ -1464,55 +1563,102 @@ int posix_cpu_nsleep(clockid_t which_clock, int flags, * We were interrupted by a signal. */ sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp); - posix_cpu_timer_set(&timer, 0, &zero_it, &it); + posix_cpu_timer_set(&timer, 0, &zero_it, it); spin_unlock_irq(&timer.it_lock); - if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) { + if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) { /* * It actually did fire already. */ return 0; } + error = -ERESTART_RESTARTBLOCK; + } + + return error; +} + +int posix_cpu_nsleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, struct timespec __user *rmtp) +{ + struct restart_block *restart_block = + ¤t_thread_info()->restart_block; + struct itimerspec it; + int error; + + /* + * Diagnose required errors first. + */ + if (CPUCLOCK_PERTHREAD(which_clock) && + (CPUCLOCK_PID(which_clock) == 0 || + CPUCLOCK_PID(which_clock) == current->pid)) + return -EINVAL; + + error = do_cpu_nanosleep(which_clock, flags, rqtp, &it); + + if (error == -ERESTART_RESTARTBLOCK) { + + if (flags & TIMER_ABSTIME) + return -ERESTARTNOHAND; /* - * Report back to the user the time still remaining. - */ - rmtp = (struct timespec __user *) restart_block->arg1; - if (rmtp != NULL && !(flags & TIMER_ABSTIME) && - copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) + * Report back to the user the time still remaining. + */ + if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) return -EFAULT; - restart_block->fn = posix_cpu_clock_nanosleep_restart; - /* Caller already set restart_block->arg1 */ + restart_block->fn = posix_cpu_nsleep_restart; restart_block->arg0 = which_clock; + restart_block->arg1 = (unsigned long) rmtp; restart_block->arg2 = rqtp->tv_sec; restart_block->arg3 = rqtp->tv_nsec; - - error = -ERESTART_RESTARTBLOCK; } - return error; } -static long -posix_cpu_clock_nanosleep_restart(struct restart_block *restart_block) +long posix_cpu_nsleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->arg0; - struct timespec t = { .tv_sec = restart_block->arg2, - .tv_nsec = restart_block->arg3 }; + struct timespec __user *rmtp; + struct timespec t; + struct itimerspec it; + int error; + + rmtp = (struct timespec __user *) restart_block->arg1; + t.tv_sec = restart_block->arg2; + t.tv_nsec = restart_block->arg3; + restart_block->fn = do_no_restart_syscall; - return posix_cpu_nsleep(which_clock, TIMER_ABSTIME, &t); + error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it); + + if (error == -ERESTART_RESTARTBLOCK) { + /* + * Report back to the user the time still remaining. + */ + if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) + return -EFAULT; + + restart_block->fn = posix_cpu_nsleep_restart; + restart_block->arg0 = which_clock; + restart_block->arg1 = (unsigned long) rmtp; + restart_block->arg2 = t.tv_sec; + restart_block->arg3 = t.tv_nsec; + } + return error; + } #define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) #define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) -static int process_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +static int process_cpu_clock_getres(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_getres(PROCESS_CLOCK, tp); } -static int process_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +static int process_cpu_clock_get(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_get(PROCESS_CLOCK, tp); } @@ -1521,16 +1667,23 @@ static int process_cpu_timer_create(struct k_itimer *timer) timer->it_clock = PROCESS_CLOCK; return posix_cpu_timer_create(timer); } -static int process_cpu_nsleep(clockid_t which_clock, int flags, - struct timespec *rqtp) +static int process_cpu_nsleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, + struct timespec __user *rmtp) +{ + return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp); +} +static long process_cpu_nsleep_restart(struct restart_block *restart_block) { - return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp); + return -EINVAL; } -static int thread_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +static int thread_cpu_clock_getres(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_getres(THREAD_CLOCK, tp); } -static int thread_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +static int thread_cpu_clock_get(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_get(THREAD_CLOCK, tp); } @@ -1539,8 +1692,12 @@ static int thread_cpu_timer_create(struct k_itimer *timer) timer->it_clock = THREAD_CLOCK; return posix_cpu_timer_create(timer); } -static int thread_cpu_nsleep(clockid_t which_clock, int flags, - struct timespec *rqtp) +static int thread_cpu_nsleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, struct timespec __user *rmtp) +{ + return -EINVAL; +} +static long thread_cpu_nsleep_restart(struct restart_block *restart_block) { return -EINVAL; } @@ -1553,6 +1710,7 @@ static __init int init_posix_cpu_timers(void) .clock_set = do_posix_clock_nosettime, .timer_create = process_cpu_timer_create, .nsleep = process_cpu_nsleep, + .nsleep_restart = process_cpu_nsleep_restart, }; struct k_clock thread = { .clock_getres = thread_cpu_clock_getres, @@ -1560,11 +1718,17 @@ static __init int init_posix_cpu_timers(void) .clock_set = do_posix_clock_nosettime, .timer_create = thread_cpu_timer_create, .nsleep = thread_cpu_nsleep, + .nsleep_restart = thread_cpu_nsleep_restart, }; + struct timespec ts; register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); + cputime_to_timespec(cputime_one_jiffy, &ts); + onecputick = ts.tv_nsec; + WARN_ON(ts.tv_sec != 0); + return 0; } __initcall(init_posix_cpu_timers);