Merge branch 'linus' into timers/core

author Thomas Gleixner <tglx@linutronix.de>

Mon, 10 May 2010 09:59:37 +0000 (11:59 +0200)

committer Thomas Gleixner <tglx@linutronix.de>

Mon, 10 May 2010 12:20:42 +0000 (14:20 +0200)
author Thomas Gleixner <tglx@linutronix.de>
Mon, 10 May 2010 09:59:37 +0000 (11:59 +0200)
committer Thomas Gleixner <tglx@linutronix.de>
Mon, 10 May 2010 12:20:42 +0000 (14:20 +0200)
diff --combined Documentation/feature-removal-schedule.txt

index 8f8e424,05df0b7..b93b781
--- 1/Documentation/feature-removal-schedule.txt
--- 2/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@@ -564,16 -564,6 +564,16 @@@ Who:     Avi Kivity <avi@redhat.com
   
   ----------------------------
   
+ +What: xtime, wall_to_monotonic
+ +When: 2.6.36+
+ +Files:        kernel/time/timekeeping.c include/linux/time.h
+ +Why:  Cleaning up timekeeping internal values. Please use
+ +      existing timekeeping accessor functions to access
+ +      the equivalent functionality.
+ +Who:  John Stultz <johnstul@us.ibm.com>
+ +
+ +----------------------------
+ +
   What: KVM kernel-allocated memory slots
   When: July 2010
   Why:  Since 2.6.25, kvm supports user-allocated memory slots, which are
@@@ -599,3 -589,26 +599,26 @@@ Why:     Useful in 2003, implementation is 
         Generally invoked by accident today.
         Seen as doing more harm than good.
   Who:  Len Brown <len.brown@intel.com>
+ 
+ ----------------------------
+ 
+ What: video4linux /dev/vtx teletext API support
+ When: 2.6.35
+ Files:        drivers/media/video/saa5246a.c drivers/media/video/saa5249.c
+       include/linux/videotext.h
+ Why:  The vtx device nodes have been superseded by vbi device nodes
+       for many years. No applications exist that use the vtx support.
+       Of the two i2c drivers that actually support this API the saa5249
+       has been impossible to use for a year now and no known hardware
+       that supports this device exists. The saa5246a is theoretically
+       supported by the old mxb boards, but it never actually worked.
+ 
+       In summary: there is no hardware that can use this API and there
+       are no applications actually implementing this API.
+ 
+       The vtx support still reserves minors 192-223 and we would really
+       like to reuse those for upcoming new functionality. In the unlikely
+       event that new hardware appears that wants to use the functionality
+       provided by the vtx API, then that functionality should be build
+       around the sliced VBI API instead.
+ Who:  Hans Verkuil <hverkuil@xs4all.nl>
diff --combined ipc/mqueue.c

index a9d8b0c,722b013..d6c09c4
--- 1/ipc/mqueue.c
--- 2/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@@ -32,6 -32,7 +32,7 @@@
   #include <linux/nsproxy.h>
   #include <linux/pid.h>
   #include <linux/ipc_namespace.h>
+ #include <linux/slab.h>
   
   #include <net/sock.h>
   #include "util.h"
@@@ -428,7 -429,7 +429,7 @@@ static void wq_add(struct mqueue_inode_
    * sr: SEND or RECV
    */
   static int wq_sleep(struct mqueue_inode_info *info, int sr,
- -                      long timeout, struct ext_wait_queue *ewp)
+ +                  ktime_t *timeout, struct ext_wait_queue *ewp)
   {
         int retval;
         signed long time;
@@@ -439,8 -440,7 +440,8 @@@
                 set_current_state(TASK_INTERRUPTIBLE);
   
                 spin_unlock(&info->lock);
- -              time = schedule_timeout(timeout);
+ +              time = schedule_hrtimeout_range_clock(timeout,
+ +                  HRTIMER_MODE_ABS, 0, CLOCK_REALTIME);
   
                 while (ewp->state == STATE_PENDING)
                         cpu_relax();
@@@ -552,16 -552,31 +553,16 @@@ static void __do_notify(struct mqueue_i
         wake_up(&info->wait_q);
   }
   
- -static long prepare_timeout(struct timespec *p)
+ +static int prepare_timeout(const struct timespec __user *u_abs_timeout,
+ +                         ktime_t *expires, struct timespec *ts)
   {
- -      struct timespec nowts;
- -      long timeout;
- -
- -      if (p) {
- -              if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0
- -                      || p->tv_nsec >= NSEC_PER_SEC))
- -                      return -EINVAL;
- -              nowts = CURRENT_TIME;
- -              /* first subtract as jiffies can't be too big */
- -              p->tv_sec -= nowts.tv_sec;
- -              if (p->tv_nsec < nowts.tv_nsec) {
- -                      p->tv_nsec += NSEC_PER_SEC;
- -                      p->tv_sec--;
- -              }
- -              p->tv_nsec -= nowts.tv_nsec;
- -              if (p->tv_sec < 0)
- -                      return 0;
- -
- -              timeout = timespec_to_jiffies(p) + 1;
- -      } else
- -              return MAX_SCHEDULE_TIMEOUT;
+ +      if (copy_from_user(ts, u_abs_timeout, sizeof(struct timespec)))
+ +              return -EFAULT;
+ +      if (!timespec_valid(ts))
+ +              return -EINVAL;
   
- -      return timeout;
+ +      *expires = timespec_to_ktime(*ts);
+ +      return 0;
   }
   
   static void remove_notification(struct mqueue_inode_info *info)
@@@ -847,21 -862,22 +848,21 @@@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mq
         struct ext_wait_queue *receiver;
         struct msg_msg *msg_ptr;
         struct mqueue_inode_info *info;
- -      struct timespec ts, *p = NULL;
- -      long timeout;
+ +      ktime_t expires, *timeout = NULL;
+ +      struct timespec ts;
         int ret;
   
         if (u_abs_timeout) {
- -              if (copy_from_user(&ts, u_abs_timeout, 
- -                                      sizeof(struct timespec)))
- -                      return -EFAULT;
- -              p = &ts;
+ +              int res = prepare_timeout(u_abs_timeout, &expires, &ts);
+ +              if (res)
+ +                      return res;
+ +              timeout = &expires;
         }
   
         if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
                 return -EINVAL;
   
- -      audit_mq_sendrecv(mqdes, msg_len, msg_prio, p);
- -      timeout = prepare_timeout(p);
+ +      audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL);
   
         filp = fget(mqdes);
         if (unlikely(!filp)) {
@@@ -903,6 -919,9 +904,6 @@@
                 if (filp->f_flags & O_NONBLOCK) {
                         spin_unlock(&info->lock);
                         ret = -EAGAIN;
- -              } else if (unlikely(timeout < 0)) {
- -                      spin_unlock(&info->lock);
- -                      ret = timeout;
                 } else {
                         wait.task = current;
                         wait.msg = (void *) msg_ptr;
@@@ -935,23 -954,24 +936,23 @@@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t
                 size_t, msg_len, unsigned int __user *, u_msg_prio,
                 const struct timespec __user *, u_abs_timeout)
   {
- -      long timeout;
         ssize_t ret;
         struct msg_msg *msg_ptr;
         struct file *filp;
         struct inode *inode;
         struct mqueue_inode_info *info;
         struct ext_wait_queue wait;
- -      struct timespec ts, *p = NULL;
+ +      ktime_t expires, *timeout = NULL;
+ +      struct timespec ts;
   
         if (u_abs_timeout) {
- -              if (copy_from_user(&ts, u_abs_timeout, 
- -                                      sizeof(struct timespec)))
- -                      return -EFAULT;
- -              p = &ts;
+ +              int res = prepare_timeout(u_abs_timeout, &expires, &ts);
+ +              if (res)
+ +                      return res;
+ +              timeout = &expires;
         }
   
- -      audit_mq_sendrecv(mqdes, msg_len, 0, p);
- -      timeout = prepare_timeout(p);
+ +      audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL);
   
         filp = fget(mqdes);
         if (unlikely(!filp)) {
@@@ -983,6 -1003,11 +984,6 @@@
                 if (filp->f_flags & O_NONBLOCK) {
                         spin_unlock(&info->lock);
                         ret = -EAGAIN;
- -                      msg_ptr = NULL;
- -              } else if (unlikely(timeout < 0)) {
- -                      spin_unlock(&info->lock);
- -                      ret = timeout;
- -                      msg_ptr = NULL;
                 } else {
                         wait.task = current;
                         wait.state = STATE_NONE;
diff --combined kernel/posix-cpu-timers.c

index 564b3b0,bc7704b..799f360
--- 1/kernel/posix-cpu-timers.c
--- 2/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@@ -11,18 -11,19 +11,18 @@@
   #include <trace/events/timer.h>
   
   /*
- - * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ + * Called after updating RLIMIT_CPU to run cpu timer and update
+ + * tsk->signal->cputime_expires expiration cache if necessary. Needs
+ + * siglock protection since other code may update expiration cache as
+ + * well.
    */
   void update_rlimit_cpu(unsigned long rlim_new)
   {
         cputime_t cputime = secs_to_cputime(rlim_new);
- -      struct signal_struct *const sig = current->signal;
   
- -      if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
- -          cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
- -              spin_lock_irq(&current->sighand->siglock);
- -              set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
- -              spin_unlock_irq(&current->sighand->siglock);
- -      }
+ +      spin_lock_irq(&current->sighand->siglock);
+ +      set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+ +      spin_unlock_irq(&current->sighand->siglock);
   }
   
   static int check_clock(const clockid_t which_clock)
@@@ -547,62 -548,111 +547,62 @@@ static inline int expires_gt(cputime_t 
                cputime_gt(expires, new_exp);
   }
   
   /*
    * Insert the timer on the appropriate list before any timers that
    * expire later.  This must be called with the tasklist_lock held
- - * for reading, and interrupts disabled.
+ + * for reading, interrupts disabled and p->sighand->siglock taken.
    */
- -static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
+ +static void arm_timer(struct k_itimer *timer)
   {
         struct task_struct *p = timer->it.cpu.task;
         struct list_head *head, *listpos;
+ +      struct task_cputime *cputime_expires;
         struct cpu_timer_list *const nt = &timer->it.cpu;
         struct cpu_timer_list *next;
- -      unsigned long i;
   
- -      head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
- -              p->cpu_timers : p->signal->cpu_timers);
+ +      if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+ +              head = p->cpu_timers;
+ +              cputime_expires = &p->cputime_expires;
+ +      } else {
+ +              head = p->signal->cpu_timers;
+ +              cputime_expires = &p->signal->cputime_expires;
+ +      }
         head += CPUCLOCK_WHICH(timer->it_clock);
   
- -      BUG_ON(!irqs_disabled());
- -      spin_lock(&p->sighand->siglock);
- -
         listpos = head;
- -      if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
- -              list_for_each_entry(next, head, entry) {
- -                      if (next->expires.sched > nt->expires.sched)
- -                              break;
- -                      listpos = &next->entry;
- -              }
- -      } else {
- -              list_for_each_entry(next, head, entry) {
- -                      if (cputime_gt(next->expires.cpu, nt->expires.cpu))
- -                              break;
- -                      listpos = &next->entry;
- -              }
+ +      list_for_each_entry(next, head, entry) {
+ +              if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
+ +                      break;
+ +              listpos = &next->entry;
         }
         list_add(&nt->entry, listpos);
   
         if (listpos == head) {
+ +              union cpu_time_count *exp = &nt->expires;
+ +
                 /*
- -               * We are the new earliest-expiring timer.
- -               * If we are a thread timer, there can always
- -               * be a process timer telling us to stop earlier.
+ +               * We are the new earliest-expiring POSIX 1.b timer, hence
+ +               * need to update expiration cache. Take into account that
+ +               * for process timers we share expiration cache with itimers
+ +               * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
                  */
   
- -              if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- -                      union cpu_time_count *exp = &nt->expires;
- -
- -                      switch (CPUCLOCK_WHICH(timer->it_clock)) {
- -                      default:
- -                              BUG();
- -                      case CPUCLOCK_PROF:
- -                              if (expires_gt(p->cputime_expires.prof_exp,
- -                                             exp->cpu))
- -                                      p->cputime_expires.prof_exp = exp->cpu;
- -                              break;
- -                      case CPUCLOCK_VIRT:
- -                              if (expires_gt(p->cputime_expires.virt_exp,
- -                                             exp->cpu))
- -                                      p->cputime_expires.virt_exp = exp->cpu;
- -                              break;
- -                      case CPUCLOCK_SCHED:
- -                              if (p->cputime_expires.sched_exp == 0 ||
- -                                  p->cputime_expires.sched_exp > exp->sched)
- -                                      p->cputime_expires.sched_exp =
- -                                                              exp->sched;
- -                              break;
- -                      }
- -              } else {
- -                      struct signal_struct *const sig = p->signal;
- -                      union cpu_time_count *exp = &timer->it.cpu.expires;
- -
- -                      /*
- -                       * For a process timer, set the cached expiration time.
- -                       */
- -                      switch (CPUCLOCK_WHICH(timer->it_clock)) {
- -                      default:
- -                              BUG();
- -                      case CPUCLOCK_VIRT:
- -                              if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
- -                                             exp->cpu))
- -                                      break;
- -                              sig->cputime_expires.virt_exp = exp->cpu;
- -                              break;
- -                      case CPUCLOCK_PROF:
- -                              if (expires_le(sig->it[CPUCLOCK_PROF].expires,
- -                                             exp->cpu))
- -                                      break;
- -                              i = sig->rlim[RLIMIT_CPU].rlim_cur;
- -                              if (i != RLIM_INFINITY &&
- -                                  i <= cputime_to_secs(exp->cpu))
- -                                      break;
- -                              sig->cputime_expires.prof_exp = exp->cpu;
- -                              break;
- -                      case CPUCLOCK_SCHED:
- -                              sig->cputime_expires.sched_exp = exp->sched;
- -                              break;
- -                      }
+ +              switch (CPUCLOCK_WHICH(timer->it_clock)) {
+ +              case CPUCLOCK_PROF:
+ +                      if (expires_gt(cputime_expires->prof_exp, exp->cpu))
+ +                              cputime_expires->prof_exp = exp->cpu;
+ +                      break;
+ +              case CPUCLOCK_VIRT:
+ +                      if (expires_gt(cputime_expires->virt_exp, exp->cpu))
+ +                              cputime_expires->virt_exp = exp->cpu;
+ +                      break;
+ +              case CPUCLOCK_SCHED:
+ +                      if (cputime_expires->sched_exp == 0 ||
+ +                          cputime_expires->sched_exp > exp->sched)
+ +                              cputime_expires->sched_exp = exp->sched;
+ +                      break;
                 }
         }
- -
- -      spin_unlock(&p->sighand->siglock);
   }
   
   /*
@@@ -610,12 -660,7 +610,12 @@@
    */
   static void cpu_timer_fire(struct k_itimer *timer)
   {
- -      if (unlikely(timer->sigq == NULL)) {
+ +      if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
+ +              /*
+ +               * User don't want any signal.
+ +               */
+ +              timer->it.cpu.expires.sched = 0;
+ +      } else if (unlikely(timer->sigq == NULL)) {
                 /*
                  * This a special case for clock_nanosleep,
                  * not a normal timer from sys_timer_create.
@@@ -676,7 -721,7 +676,7 @@@ int posix_cpu_timer_set(struct k_itime
                         struct itimerspec *new, struct itimerspec *old)
   {
         struct task_struct *p = timer->it.cpu.task;
- -      union cpu_time_count old_expires, new_expires, val;
+ +      union cpu_time_count old_expires, new_expires, old_incr, val;
         int ret;
   
         if (unlikely(p == NULL)) {
@@@ -707,7 -752,6 +707,7 @@@
         BUG_ON(!irqs_disabled());
   
         ret = 0;
+ +      old_incr = timer->it.cpu.incr;
         spin_lock(&p->sighand->siglock);
         old_expires = timer->it.cpu.expires;
         if (unlikely(timer->it.cpu.firing)) {
@@@ -715,6 -759,7 +715,6 @@@
                 ret = TIMER_RETRY;
         } else
                 list_del_init(&timer->it.cpu.entry);
- -      spin_unlock(&p->sighand->siglock);
   
         /*
          * We need to sample the current value to convert the new
@@@ -768,7 -813,6 +768,7 @@@
                  * disable this firing since we are already reporting
                  * it as an overrun (thanks to bump_cpu_timer above).
                  */
+ +              spin_unlock(&p->sighand->siglock);
                 read_unlock(&tasklist_lock);
                 goto out;
         }
@@@ -784,11 -828,11 +784,11 @@@
          */
         timer->it.cpu.expires = new_expires;
         if (new_expires.sched != 0 &&
- -          (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
             cpu_time_before(timer->it_clock, val, new_expires)) {
- -              arm_timer(timer, val);
+ +              arm_timer(timer);
         }
   
+ +      spin_unlock(&p->sighand->siglock);
         read_unlock(&tasklist_lock);
   
         /*
@@@ -809,6 -853,7 +809,6 @@@
         timer->it_overrun = -1;
   
         if (new_expires.sched != 0 &&
- -          (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
             !cpu_time_before(timer->it_clock, val, new_expires)) {
                 /*
                  * The designated time already passed, so we notify
@@@ -822,7 -867,7 +822,7 @@@
    out:
         if (old) {
                 sample_to_timespec(timer->it_clock,
- -                                 timer->it.cpu.incr, &old->it_interval);
+ +                                 old_incr, &old->it_interval);
         }
         return ret;
   }
@@@ -882,6 -927,25 +882,6 @@@ void posix_cpu_timer_get(struct k_itime
                 read_unlock(&tasklist_lock);
         }
   
- -      if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
- -              if (timer->it.cpu.incr.sched == 0 &&
- -                  cpu_time_before(timer->it_clock,
- -                                  timer->it.cpu.expires, now)) {
- -                      /*
- -                       * Do-nothing timer expired and has no reload,
- -                       * so it's as if it was never set.
- -                       */
- -                      timer->it.cpu.expires.sched = 0;
- -                      itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
- -                      return;
- -              }
- -              /*
- -               * Account for any expirations and reloads that should
- -               * have happened.
- -               */
- -              bump_cpu_timer(timer, now);
- -      }
- -
         if (unlikely(clear_dead)) {
                 /*
                  * We've noticed that the thread is dead, but
@@@ -997,9 -1061,9 +997,9 @@@ static void check_thread_timers(struct 
         }
   }
   
- static void stop_process_timers(struct task_struct *tsk)
+ static void stop_process_timers(struct signal_struct *sig)
   {
-       struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+       struct thread_group_cputimer *cputimer = &sig->cputimer;
         unsigned long flags;
   
         if (!cputimer->running)
@@@ -1008,6 -1072,10 +1008,10 @@@
         spin_lock_irqsave(&cputimer->lock, flags);
         cputimer->running = 0;
         spin_unlock_irqrestore(&cputimer->lock, flags);
+ 
+       sig->cputime_expires.prof_exp = cputime_zero;
+       sig->cputime_expires.virt_exp = cputime_zero;
+       sig->cputime_expires.sched_exp = 0;
   }
   
   static u32 onecputick;
@@@ -1069,7 -1137,7 +1073,7 @@@ static void check_process_timers(struc
             list_empty(&timers[CPUCLOCK_VIRT]) &&
             cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
             list_empty(&timers[CPUCLOCK_SCHED])) {
-               stop_process_timers(tsk);
+               stop_process_timers(sig);
                 return;
         }
   
@@@ -1202,7 -1270,6 +1206,7 @@@ void posix_cpu_timer_schedule(struct k_
                         goto out;
                 }
                 read_lock(&tasklist_lock); /* arm_timer needs it.  */
+ +              spin_lock(&p->sighand->siglock);
         } else {
                 read_lock(&tasklist_lock);
                 if (unlikely(p->signal == NULL)) {
@@@ -1223,7 -1290,6 +1227,7 @@@
                         clear_dead_task(timer, now);
                         goto out_unlock;
                 }
+ +              spin_lock(&p->sighand->siglock);
                 cpu_timer_sample_group(timer->it_clock, p, &now);
                 bump_cpu_timer(timer, now);
                 /* Leave the tasklist_lock locked for the call below.  */
@@@ -1232,9 -1298,7 +1236,9 @@@
         /*
          * Now re-arm for the new expiry time.
          */
- -      arm_timer(timer, now);
+ +      BUG_ON(!irqs_disabled());
+ +      arm_timer(timer);
+ +      spin_unlock(&p->sighand->siglock);
   
   out_unlock:
         read_unlock(&tasklist_lock);
@@@ -1326,7 -1390,7 +1330,7 @@@ static inline int fastpath_timer_check(
                         return 1;
         }
   
- -      return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
+ +      return 0;
   }
   
   /*
@@@ -1392,23 -1456,21 +1396,23 @@@ void run_posix_cpu_timers(struct task_s
   }
   
   /*
- - * Set one of the process-wide special case CPU timers.
+ + * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
    * The tsk->sighand->siglock must be held by the caller.
- - * The *newval argument is relative and we update it to be absolute, *oldval
- - * is absolute and we update it to be relative.
    */
   void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
                            cputime_t *newval, cputime_t *oldval)
   {
         union cpu_time_count now;
- -      struct list_head *head;
   
         BUG_ON(clock_idx == CPUCLOCK_SCHED);
         cpu_timer_sample_group(clock_idx, tsk, &now);
   
         if (oldval) {
+ +              /*
+ +               * We are setting itimer. The *oldval is absolute and we update
+ +               * it to be relative, *newval argument is relative and we update
+ +               * it to be absolute.
+ +               */
                 if (!cputime_eq(*oldval, cputime_zero)) {
                         if (cputime_le(*oldval, now.cpu)) {
                                 /* Just about to fire. */
@@@ -1421,21 -1483,33 +1425,21 @@@
                 if (cputime_eq(*newval, cputime_zero))
                         return;
                 *newval = cputime_add(*newval, now.cpu);
         }
   
         /*
- -       * Check whether there are any process timers already set to fire
- -       * before this one.  If so, we don't have anything more to do.
+ +       * Update expiration cache if we are the earliest timer, or eventually
+ +       * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
          */
- -      head = &tsk->signal->cpu_timers[clock_idx];
- -      if (list_empty(head) ||
- -          cputime_ge(list_first_entry(head,
- -                                struct cpu_timer_list, entry)->expires.cpu,
- -                     *newval)) {
- -              switch (clock_idx) {
- -              case CPUCLOCK_PROF:
+ +      switch (clock_idx) {
+ +      case CPUCLOCK_PROF:
+ +              if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
                         tsk->signal->cputime_expires.prof_exp = *newval;
- -                      break;
- -              case CPUCLOCK_VIRT:
+ +              break;
+ +      case CPUCLOCK_VIRT:
+ +              if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
                         tsk->signal->cputime_expires.virt_exp = *newval;
- -                      break;
- -              }
+ +              break;
         }
   }
   
diff --combined kernel/time.c

index 2358a36,656dccf..50612fa
--- 1/kernel/time.c
--- 2/kernel/time.c
+++ b/kernel/time.c
@@@ -35,7 -35,6 +35,6 @@@
   #include <linux/syscalls.h>
   #include <linux/security.h>
   #include <linux/fs.h>
- #include <linux/slab.h>
   #include <linux/math64.h>
   #include <linux/ptrace.h>
   
@@@ -133,11 -132,12 +132,11 @@@ SYSCALL_DEFINE2(gettimeofday, struct ti
    */
   static inline void warp_clock(void)
   {
- -      write_seqlock_irq(&xtime_lock);
- -      wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
- -      xtime.tv_sec += sys_tz.tz_minuteswest * 60;
- -      update_xtime_cache(0);
- -      write_sequnlock_irq(&xtime_lock);
- -      clock_was_set();
+ +      struct timespec delta, adjust;
+ +      delta.tv_sec = sys_tz.tz_minuteswest * 60;
+ +      delta.tv_nsec = 0;
+ +      adjust = timespec_add_safe(current_kernel_time(), delta);
+ +      do_settimeofday(&adjust);
   }
   
   /*
diff --combined kernel/time/timekeeping.c

index 1137f24,39f6177..caf8d4d
--- 1/kernel/time/timekeeping.c
--- 2/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@@ -165,6 -165,13 +165,6 @@@ struct timespec raw_time
   /* flag for if timekeeping is suspended */
   int __read_mostly timekeeping_suspended;
   
- -static struct timespec xtime_cache __attribute__ ((aligned (16)));
- -void update_xtime_cache(u64 nsec)
- -{
- -      xtime_cache = xtime;
- -      timespec_add_ns(&xtime_cache, nsec);
- -}
- -
   /* must hold xtime_lock */
   void timekeeping_leap_insert(int leapsecond)
   {
@@@ -325,6 -332,8 +325,6 @@@ int do_settimeofday(struct timespec *tv
   
         xtime = *tv;
   
- -      update_xtime_cache(0);
- -
         timekeeper.ntp_error = 0;
         ntp_clear();
   
@@@ -550,6 -559,7 +550,6 @@@ void __init timekeeping_init(void
         }
         set_normalized_timespec(&wall_to_monotonic,
                                 -boot.tv_sec, -boot.tv_nsec);
- -      update_xtime_cache(0);
         total_sleep_time.tv_sec = 0;
         total_sleep_time.tv_nsec = 0;
         write_sequnlock_irqrestore(&xtime_lock, flags);
@@@ -583,6 -593,7 +583,6 @@@ static int timekeeping_resume(struct sy
                 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
                 total_sleep_time = timespec_add_safe(total_sleep_time, ts);
         }
- -      update_xtime_cache(0);
         /* re-base the last cycle value */
         timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
         timekeeper.ntp_error = 0;
@@@ -777,6 -788,7 +777,6 @@@ void update_wall_time(void
   {
         struct clocksource *clock;
         cycle_t offset;
- -      u64 nsecs;
         int shift = 0, maxshift;
   
         /* Make sure we're fully resumed: */
@@@ -806,7 -818,8 +806,8 @@@
         shift = min(shift, maxshift);
         while (offset >= timekeeper.cycle_interval) {
                 offset = logarithmic_accumulation(offset, shift);
-               shift--;
+               if(offset < timekeeper.cycle_interval<<shift)
+                       shift--;
         }
   
         /* correct the clock when NTP error is too big */
@@@ -834,9 -847,7 +835,9 @@@
                 timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
         }
   
- -      /* store full nanoseconds into xtime after rounding it up and
+ +
+ +      /*
+ +       * Store full nanoseconds into xtime after rounding it up and
          * add the remainder to the error difference.
          */
         xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
@@@ -844,15 -855,8 +845,15 @@@
         timekeeper.ntp_error += timekeeper.xtime_nsec <<
                                 timekeeper.ntp_error_shift;
   
- -      nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
- -      update_xtime_cache(nsecs);
+ +      /*
+ +       * Finally, make sure that after the rounding
+ +       * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+ +       */
+ +      if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
+ +              xtime.tv_nsec -= NSEC_PER_SEC;
+ +              xtime.tv_sec++;
+ +              second_overflow();
+ +      }
   
         /* check to see if there is a new clocksource to use */
         update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
@@@ -892,13 -896,13 +893,13 @@@ EXPORT_SYMBOL_GPL(monotonic_to_bootbase
   
   unsigned long get_seconds(void)
   {
- -      return xtime_cache.tv_sec;
+ +      return xtime.tv_sec;
   }
   EXPORT_SYMBOL(get_seconds);
   
   struct timespec __current_kernel_time(void)
   {
- -      return xtime_cache;
+ +      return xtime;
   }
   
   struct timespec current_kernel_time(void)
@@@ -909,7 -913,7 +910,7 @@@
         do {
                 seq = read_seqbegin(&xtime_lock);
   
- -              now = xtime_cache;
+ +              now = xtime;
         } while (read_seqretry(&xtime_lock, seq));
   
         return now;
@@@ -924,7 -928,7 +925,7 @@@ struct timespec get_monotonic_coarse(vo
         do {
                 seq = read_seqbegin(&xtime_lock);
   
- -              now = xtime_cache;
+ +              now = xtime;
                 mono = wall_to_monotonic;
         } while (read_seqretry(&xtime_lock, seq));
   
diff --combined kernel/timer.c

index 49773f3,aeb6a54..9199f3c
--- 1/kernel/timer.c
--- 2/kernel/timer.c
+++ b/kernel/timer.c
@@@ -39,6 -39,7 +39,7 @@@
   #include <linux/kallsyms.h>
   #include <linux/perf_event.h>
   #include <linux/sched.h>
+ #include <linux/slab.h>
   
   #include <asm/uaccess.h>
   #include <asm/unistd.h>
@@@ -318,24 -319,6 +319,24 @@@ unsigned long round_jiffies_up_relative
   }
   EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
   
+ +/**
+ + * set_timer_slack - set the allowed slack for a timer
+ + * @slack_hz: the amount of time (in jiffies) allowed for rounding
+ + *
+ + * Set the amount of time, in jiffies, that a certain timer has
+ + * in terms of slack. By setting this value, the timer subsystem
+ + * will schedule the actual timer somewhere between
+ + * the time mod_timer() asks for, and that time plus the slack.
+ + *
+ + * By setting the slack to -1, a percentage of the delay is used
+ + * instead.
+ + */
+ +void set_timer_slack(struct timer_list *timer, int slack_hz)
+ +{
+ +      timer->slack = slack_hz;
+ +}
+ +EXPORT_SYMBOL_GPL(set_timer_slack);
+ +
   
   static inline void set_running_timer(struct tvec_base *base,
                                         struct timer_list *timer)
@@@ -567,7 -550,6 +568,7 @@@ static void __init_timer(struct timer_l
   {
         timer->entry.next = NULL;
         timer->base = __raw_get_cpu_var(tvec_bases);
+ +      timer->slack = -1;
   #ifdef CONFIG_TIMER_STATS
         timer->start_site = NULL;
         timer->start_pid = -1;
@@@ -733,41 -715,6 +734,41 @@@ int mod_timer_pending(struct timer_lis
   }
   EXPORT_SYMBOL(mod_timer_pending);
   
+ +/*
+ + * Decide where to put the timer while taking the slack into account
+ + *
+ + * Algorithm:
+ + *   1) calculate the maximum (absolute) time
+ + *   2) calculate the highest bit where the expires and new max are different
+ + *   3) use this bit to make a mask
+ + *   4) use the bitmask to round down the maximum time, so that all last
+ + *      bits are zeros
+ + */
+ +static inline
+ +unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
+ +{
+ +      unsigned long expires_limit, mask;
+ +      int bit;
+ +
+ +      expires_limit = expires + timer->slack;
+ +
+ +      if (timer->slack < 0) /* auto slack: use 0.4% */
+ +              expires_limit = expires + (expires - jiffies)/256;
+ +
+ +      mask = expires ^ expires_limit;
+ +
+ +      if (mask == 0)
+ +              return expires;
+ +
+ +      bit = find_last_bit(&mask, BITS_PER_LONG);
+ +
+ +      mask = (1 << bit) - 1;
+ +
+ +      expires_limit = expires_limit & ~(mask);
+ +
+ +      return expires_limit;
+ +}
+ +
   /**
    * mod_timer - modify a timer's timeout
    * @timer: the timer to be modified
@@@ -798,8 -745,6 +799,8 @@@ int mod_timer(struct timer_list *timer
         if (timer_pending(timer) && timer->expires == expires)
                 return 1;
   
+ +      expires = apply_slack(timer, expires);
+ +
         return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
   }
   EXPORT_SYMBOL(mod_timer);
@@@ -936,6 -881,7 +937,7 @@@ int try_to_del_timer_sync(struct timer_
         if (base->running_timer == timer)
                 goto out;
   
+       timer_stats_timer_clear_start_info(timer);
         ret = 0;
         if (timer_pending(timer)) {
                 detach_timer(timer, 1);
@@@ -1009,47 -955,6 +1011,47 @@@ static int cascade(struct tvec_base *ba
         return index;
   }
   
+ +static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
+ +                        unsigned long data)
+ +{
+ +      int preempt_count = preempt_count();
+ +
+ +#ifdef CONFIG_LOCKDEP
+ +      /*
+ +       * It is permissible to free the timer from inside the
+ +       * function that is called from it, this we need to take into
+ +       * account for lockdep too. To avoid bogus "held lock freed"
+ +       * warnings as well as problems when looking into
+ +       * timer->lockdep_map, make a copy and use that here.
+ +       */
+ +      struct lockdep_map lockdep_map = timer->lockdep_map;
+ +#endif
+ +      /*
+ +       * Couple the lock chain with the lock chain at
+ +       * del_timer_sync() by acquiring the lock_map around the fn()
+ +       * call here and in del_timer_sync().
+ +       */
+ +      lock_map_acquire(&lockdep_map);
+ +
+ +      trace_timer_expire_entry(timer);
+ +      fn(data);
+ +      trace_timer_expire_exit(timer);
+ +
+ +      lock_map_release(&lockdep_map);
+ +
+ +      if (preempt_count != preempt_count()) {
+ +              WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+ +                        fn, preempt_count, preempt_count());
+ +              /*
+ +               * Restore the preempt count. That gives us a decent
+ +               * chance to survive and extract information. If the
+ +               * callback kept a lock held, bad luck, but not worse
+ +               * than the BUG() we had.
+ +               */
+ +              preempt_count() = preempt_count;
+ +      }
+ +}
+ +
   #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
   
   /**
@@@ -1093,7 -998,45 +1095,7 @@@ static inline void __run_timers(struct 
                         detach_timer(timer, 1);
   
                         spin_unlock_irq(&base->lock);
- -                      {
- -                              int preempt_count = preempt_count();
- -
- -#ifdef CONFIG_LOCKDEP
- -                              /*
- -                               * It is permissible to free the timer from
- -                               * inside the function that is called from
- -                               * it, this we need to take into account for
- -                               * lockdep too. To avoid bogus "held lock
- -                               * freed" warnings as well as problems when
- -                               * looking into timer->lockdep_map, make a
- -                               * copy and use that here.
- -                               */
- -                              struct lockdep_map lockdep_map =
- -                                      timer->lockdep_map;
- -#endif
- -                              /*
- -                               * Couple the lock chain with the lock chain at
- -                               * del_timer_sync() by acquiring the lock_map
- -                               * around the fn() call here and in
- -                               * del_timer_sync().
- -                               */
- -                              lock_map_acquire(&lockdep_map);
- -
- -                              trace_timer_expire_entry(timer);
- -                              fn(data);
- -                              trace_timer_expire_exit(timer);
- -
- -                              lock_map_release(&lockdep_map);
- -
- -                              if (preempt_count != preempt_count()) {
- -                                      printk(KERN_ERR "huh, entered %p "
- -                                             "with preempt_count %08x, exited"
- -                                             " with %08x?\n",
- -                                             fn, preempt_count,
- -                                             preempt_count());
- -                                      BUG();
- -                              }
- -                      }
+ +                      call_timer_fn(timer, fn, data);
                         spin_lock_irq(&base->lock);
                 }
         }
author	Thomas Gleixner <tglx@linutronix.de>
	Mon, 10 May 2010 09:59:37 +0000 (11:59 +0200)
committer	Thomas Gleixner <tglx@linutronix.de>
	Mon, 10 May 2010 12:20:42 +0000 (14:20 +0200)
		1	2
Documentation/feature-removal-schedule.txt	patch \|	diff1 \|	diff2 \|	blob \| history
ipc/mqueue.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/posix-cpu-timers.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/timekeeping.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/timer.c	patch \|	diff1 \|	diff2 \|	blob \| history