sched, x86: clean up hrtick implementation
authorPeter Zijlstra <peterz@infradead.org>
Fri, 18 Jul 2008 16:01:23 +0000 (18:01 +0200)
committerIngo Molnar <mingo@elte.hu>
Sun, 20 Jul 2008 08:37:28 +0000 (10:37 +0200)
random uvesafb failures were reported against Gentoo:

  http://bugs.gentoo.org/show_bug.cgi?id=222799

and Mihai Moldovan bisected it back to:

8f4d37ec073c17e2d4aa8851df5837d798606d6f is first bad commit
> commit 8f4d37ec073c17e2d4aa8851df5837d798606d6f
> Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Date:   Fri Jan 25 21:08:29 2008 +0100
>
>    sched: high-res preemption tick

Linus suspected it to be hrtick + vm86 interaction and observed:

> Btw, Peter, Ingo: I think that commit is doing bad things. They aren't
> _incorrect_ per se, but they are definitely bad.
>
> Why?
>
> Using random _TIF_WORK_MASK flags is really impolite for doing
> "scheduling" work. There's a reason that arch/x86/kernel/entry_32.S
> special-cases the _TIF_NEED_RESCHED flag: we don't want to exit out of
> vm86 mode unnecessarily.
>
> See the "work_notifysig_v86" label, and how it does that
> "save_v86_state()" thing etc etc.

Right, I never liked having to fiddle with those TIF flags. Initially I
needed it because the hrtimer base lock could not nest in the rq lock.
That however is fixed these days.

Currently the only reason left to fiddle with the TIF flags is remote
wakeups. We cannot program a remote cpu's hrtimer. I've been thinking
about using the new and improved IPI function call stuff to implement
hrtimer_start_on().

However that does require that smp_call_function_single(.wait=0) works
from interrupt context - /me looks at the latest series from Jens - Yes
that does seem to be supported, good.

Here's a stab at cleaning this stuff up ...

Mihai reported test success as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Mihai Moldovan <ionic@ionic.de>
Cc: Michal Januszewski <spock@gentoo.org>
Cc: Antonino Daplas <adaplas@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/signal_32.c
arch/x86/kernel/signal_64.c
include/asm-x86/thread_info.h
kernel/Kconfig.hz
kernel/sched.c
kernel/sched_fair.c

index d923736..e1fc7bd 100644 (file)
@@ -667,8 +667,5 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
        if (thread_info_flags & _TIF_SIGPENDING)
                do_signal(regs);
 
-       if (thread_info_flags & _TIF_HRTICK_RESCHED)
-               hrtick_resched();
-
        clear_thread_flag(TIF_IRET);
 }
index e53b267..88023fc 100644 (file)
@@ -502,9 +502,6 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
        /* deal with pending signal delivery */
        if (thread_info_flags & _TIF_SIGPENDING)
                do_signal(regs);
-
-       if (thread_info_flags & _TIF_HRTICK_RESCHED)
-               hrtick_resched();
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
index 895339d..d701263 100644 (file)
@@ -81,7 +81,6 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
 #define TIF_SECCOMP            8       /* secure computing */
 #define TIF_MCE_NOTIFY         10      /* notify userspace of an MCE */
-#define TIF_HRTICK_RESCHED     11      /* reprogram hrtick timer */
 #define TIF_NOTSC              16      /* TSC is not accessible in userland */
 #define TIF_IA32               17      /* 32bit process */
 #define TIF_FORK               18      /* ret_from_fork */
@@ -108,7 +107,6 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
 #define _TIF_MCE_NOTIFY                (1 << TIF_MCE_NOTIFY)
-#define _TIF_HRTICK_RESCHED    (1 << TIF_HRTICK_RESCHED)
 #define _TIF_NOTSC             (1 << TIF_NOTSC)
 #define _TIF_IA32              (1 << TIF_IA32)
 #define _TIF_FORK              (1 << TIF_FORK)
@@ -132,7 +130,7 @@ struct thread_info {
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK                                            \
-       (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
+       (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW                                                        \
index 526128a..2a202a8 100644 (file)
@@ -55,4 +55,4 @@ config HZ
        default 1000 if HZ_1000
 
 config SCHED_HRTICK
-       def_bool HIGH_RES_TIMERS && X86
+       def_bool HIGH_RES_TIMERS
index 1ee18db..c13c75e 100644 (file)
@@ -571,8 +571,10 @@ struct rq {
 #endif
 
 #ifdef CONFIG_SCHED_HRTICK
-       unsigned long hrtick_flags;
-       ktime_t hrtick_expire;
+#ifdef CONFIG_SMP
+       int hrtick_csd_pending;
+       struct call_single_data hrtick_csd;
+#endif
        struct hrtimer hrtick_timer;
 #endif
 
@@ -983,13 +985,6 @@ static struct rq *this_rq_lock(void)
        return rq;
 }
 
-static void __resched_task(struct task_struct *p, int tif_bit);
-
-static inline void resched_task(struct task_struct *p)
-{
-       __resched_task(p, TIF_NEED_RESCHED);
-}
-
 #ifdef CONFIG_SCHED_HRTICK
 /*
  * Use HR-timers to deliver accurate preemption points.
@@ -1001,25 +996,6 @@ static inline void resched_task(struct task_struct *p)
  * When we get rescheduled we reprogram the hrtick_timer outside of the
  * rq->lock.
  */
-static inline void resched_hrt(struct task_struct *p)
-{
-       __resched_task(p, TIF_HRTICK_RESCHED);
-}
-
-static inline void resched_rq(struct rq *rq)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&rq->lock, flags);
-       resched_task(rq->curr);
-       spin_unlock_irqrestore(&rq->lock, flags);
-}
-
-enum {
-       HRTICK_SET,             /* re-programm hrtick_timer */
-       HRTICK_RESET,           /* not a new slice */
-       HRTICK_BLOCK,           /* stop hrtick operations */
-};
 
 /*
  * Use hrtick when:
@@ -1030,40 +1006,11 @@ static inline int hrtick_enabled(struct rq *rq)
 {
        if (!sched_feat(HRTICK))
                return 0;
-       if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags)))
+       if (!cpu_online(cpu_of(rq)))
                return 0;
        return hrtimer_is_hres_active(&rq->hrtick_timer);
 }
 
-/*
- * Called to set the hrtick timer state.
- *
- * called with rq->lock held and irqs disabled
- */
-static void hrtick_start(struct rq *rq, u64 delay, int reset)
-{
-       assert_spin_locked(&rq->lock);
-
-       /*
-        * preempt at: now + delay
-        */
-       rq->hrtick_expire =
-               ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
-       /*
-        * indicate we need to program the timer
-        */
-       __set_bit(HRTICK_SET, &rq->hrtick_flags);
-       if (reset)
-               __set_bit(HRTICK_RESET, &rq->hrtick_flags);
-
-       /*
-        * New slices are called from the schedule path and don't need a
-        * forced reschedule.
-        */
-       if (reset)
-               resched_hrt(rq->curr);
-}
-
 static void hrtick_clear(struct rq *rq)
 {
        if (hrtimer_active(&rq->hrtick_timer))
@@ -1071,32 +1018,6 @@ static void hrtick_clear(struct rq *rq)
 }
 
 /*
- * Update the timer from the possible pending state.
- */
-static void hrtick_set(struct rq *rq)
-{
-       ktime_t time;
-       int set, reset;
-       unsigned long flags;
-
-       WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-
-       spin_lock_irqsave(&rq->lock, flags);
-       set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
-       reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
-       time = rq->hrtick_expire;
-       clear_thread_flag(TIF_HRTICK_RESCHED);
-       spin_unlock_irqrestore(&rq->lock, flags);
-
-       if (set) {
-               hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
-               if (reset && !hrtimer_active(&rq->hrtick_timer))
-                       resched_rq(rq);
-       } else
-               hrtick_clear(rq);
-}
-
-/*
  * High-resolution timer tick.
  * Runs from hardirq context with interrupts disabled.
  */
@@ -1115,27 +1036,37 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
 }
 
 #ifdef CONFIG_SMP
-static void hotplug_hrtick_disable(int cpu)
+/*
+ * called from hardirq (IPI) context
+ */
+static void __hrtick_start(void *arg)
 {
-       struct rq *rq = cpu_rq(cpu);
-       unsigned long flags;
-
-       spin_lock_irqsave(&rq->lock, flags);
-       rq->hrtick_flags = 0;
-       __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
-       spin_unlock_irqrestore(&rq->lock, flags);
+       struct rq *rq = arg;
 
-       hrtick_clear(rq);
+       spin_lock(&rq->lock);
+       hrtimer_restart(&rq->hrtick_timer);
+       rq->hrtick_csd_pending = 0;
+       spin_unlock(&rq->lock);
 }
 
-static void hotplug_hrtick_enable(int cpu)
+/*
+ * Called to set the hrtick timer state.
+ *
+ * called with rq->lock held and irqs disabled
+ */
+static void hrtick_start(struct rq *rq, u64 delay)
 {
-       struct rq *rq = cpu_rq(cpu);
-       unsigned long flags;
+       struct hrtimer *timer = &rq->hrtick_timer;
+       ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
 
-       spin_lock_irqsave(&rq->lock, flags);
-       __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags);
-       spin_unlock_irqrestore(&rq->lock, flags);
+       timer->expires = time;
+
+       if (rq == this_rq()) {
+               hrtimer_restart(timer);
+       } else if (!rq->hrtick_csd_pending) {
+               __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
+               rq->hrtick_csd_pending = 1;
+       }
 }
 
 static int
@@ -1150,16 +1081,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
        case CPU_DOWN_PREPARE_FROZEN:
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-               hotplug_hrtick_disable(cpu);
-               return NOTIFY_OK;
-
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-       case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               hotplug_hrtick_enable(cpu);
+               hrtick_clear(cpu_rq(cpu));
                return NOTIFY_OK;
        }
 
@@ -1170,46 +1092,45 @@ static void init_hrtick(void)
 {
        hotcpu_notifier(hotplug_hrtick, 0);
 }
-#endif /* CONFIG_SMP */
+#else
+/*
+ * Called to set the hrtick timer state.
+ *
+ * called with rq->lock held and irqs disabled
+ */
+static void hrtick_start(struct rq *rq, u64 delay)
+{
+       hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
+}
 
-static void init_rq_hrtick(struct rq *rq)
+static void init_hrtick(void)
 {
-       rq->hrtick_flags = 0;
-       hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-       rq->hrtick_timer.function = hrtick;
-       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 }
+#endif /* CONFIG_SMP */
 
-void hrtick_resched(void)
+static void init_rq_hrtick(struct rq *rq)
 {
-       struct rq *rq;
-       unsigned long flags;
+#ifdef CONFIG_SMP
+       rq->hrtick_csd_pending = 0;
 
-       if (!test_thread_flag(TIF_HRTICK_RESCHED))
-               return;
+       rq->hrtick_csd.flags = 0;
+       rq->hrtick_csd.func = __hrtick_start;
+       rq->hrtick_csd.info = rq;
+#endif
 
-       local_irq_save(flags);
-       rq = cpu_rq(smp_processor_id());
-       hrtick_set(rq);
-       local_irq_restore(flags);
+       hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       rq->hrtick_timer.function = hrtick;
+       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 }
 #else
 static inline void hrtick_clear(struct rq *rq)
 {
 }
 
-static inline void hrtick_set(struct rq *rq)
-{
-}
-
 static inline void init_rq_hrtick(struct rq *rq)
 {
 }
 
-void hrtick_resched(void)
-{
-}
-
 static inline void init_hrtick(void)
 {
 }
@@ -1228,16 +1149,16 @@ static inline void init_hrtick(void)
 #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
 #endif
 
-static void __resched_task(struct task_struct *p, int tif_bit)
+static void resched_task(struct task_struct *p)
 {
        int cpu;
 
        assert_spin_locked(&task_rq(p)->lock);
 
-       if (unlikely(test_tsk_thread_flag(p, tif_bit)))
+       if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
                return;
 
-       set_tsk_thread_flag(p, tif_bit);
+       set_tsk_thread_flag(p, TIF_NEED_RESCHED);
 
        cpu = task_cpu(p);
        if (cpu == smp_processor_id())
@@ -1303,10 +1224,10 @@ void wake_up_idle_cpu(int cpu)
 #endif /* CONFIG_NO_HZ */
 
 #else /* !CONFIG_SMP */
-static void __resched_task(struct task_struct *p, int tif_bit)
+static void resched_task(struct task_struct *p)
 {
        assert_spin_locked(&task_rq(p)->lock);
-       set_tsk_thread_flag(p, tif_bit);
+       set_tsk_need_resched(p);
 }
 #endif /* CONFIG_SMP */
 
@@ -4395,7 +4316,7 @@ asmlinkage void __sched schedule(void)
        struct task_struct *prev, *next;
        unsigned long *switch_count;
        struct rq *rq;
-       int cpu, hrtick = sched_feat(HRTICK);
+       int cpu;
 
 need_resched:
        preempt_disable();
@@ -4410,7 +4331,7 @@ need_resched_nonpreemptible:
 
        schedule_debug(prev);
 
-       if (hrtick)
+       if (sched_feat(HRTICK))
                hrtick_clear(rq);
 
        /*
@@ -4457,9 +4378,6 @@ need_resched_nonpreemptible:
        } else
                spin_unlock_irq(&rq->lock);
 
-       if (hrtick)
-               hrtick_set(rq);
-
        if (unlikely(reacquire_kernel_lock(current) < 0))
                goto need_resched_nonpreemptible;
 
index f2aa987..6893b3e 100644 (file)
@@ -878,7 +878,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 #ifdef CONFIG_SCHED_HRTICK
 static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 {
-       int requeue = rq->curr == p;
        struct sched_entity *se = &p->se;
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
@@ -899,10 +898,10 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
                 * Don't schedule slices shorter than 10000ns, that just
                 * doesn't make sense. Rely on vruntime for fairness.
                 */
-               if (!requeue)
+               if (rq->curr != p)
                        delta = max(10000LL, delta);
 
-               hrtick_start(rq, delta, requeue);
+               hrtick_start(rq, delta);
        }
 }
 #else /* !CONFIG_SCHED_HRTICK */