* Distribute under GPLv2.
*
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ *
+ * Remote softirq infrastructure is by Jens Axboe.
*/
#include <linux/module.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/rcupdate.h>
+#include <linux/ftrace.h>
#include <linux/smp.h>
#include <linux/tick.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/irq.h>
+
#include <asm/irq.h>
/*
- No shared variables, all the data are CPU local.
EXPORT_SYMBOL(irq_stat);
#endif
-static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
+static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+char *softirq_to_name[NR_SOFTIRQS] = {
+ "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
+ "TASKLET", "SCHED", "HRTIMER", "RCU"
+};
+
/*
* we cannot loop indefinitely here to avoid userspace starvation,
* but we also don't want to introduce a worst case 1/HZ latency
* to the pending events, so lets the scheduler to balance
* the softirq load for us.
*/
-static inline void wakeup_softirqd(void)
+void wakeup_softirqd(void)
{
/* Interrupts are disabled: no need to stop preemption */
struct task_struct *tsk = __get_cpu_var(ksoftirqd);
WARN_ON_ONCE(in_irq());
raw_local_irq_save(flags);
- add_preempt_count(SOFTIRQ_OFFSET);
+ /*
+ * The preempt tracer hooks into add_preempt_count and will break
+ * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
+ * is set and before current->softirq_enabled is cleared.
+ * We must manually increment preempt_count here and manually
+ * call the trace_preempt_off later.
+ */
+ preempt_count() += SOFTIRQ_OFFSET;
/*
* Were softirqs turned off above:
*/
if (softirq_count() == SOFTIRQ_OFFSET)
trace_softirqs_off(ip);
raw_local_irq_restore(flags);
+
+ if (preempt_count() == SOFTIRQ_OFFSET)
+ trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
#else /* !CONFIG_TRACE_IRQFLAGS */
static inline void __local_bh_disable(unsigned long ip)
EXPORT_SYMBOL(local_bh_disable);
-void __local_bh_enable(void)
-{
- WARN_ON_ONCE(in_irq());
-
- /*
- * softirqs should never be enabled by __local_bh_enable(),
- * it always nests inside local_bh_enable() sections:
- */
- WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
-
- sub_preempt_count(SOFTIRQ_OFFSET);
-}
-EXPORT_SYMBOL_GPL(__local_bh_enable);
-
/*
* Special-case - softirqs can safely be enabled in
* cond_resched_softirq(), or by __do_softirq(),
EXPORT_SYMBOL(_local_bh_enable);
-void local_bh_enable(void)
+static inline void _local_bh_enable_ip(unsigned long ip)
{
+ WARN_ON_ONCE(in_irq() || irqs_disabled());
#ifdef CONFIG_TRACE_IRQFLAGS
- unsigned long flags;
-
- WARN_ON_ONCE(in_irq());
-#endif
- WARN_ON_ONCE(irqs_disabled());
-
-#ifdef CONFIG_TRACE_IRQFLAGS
- local_irq_save(flags);
+ local_irq_disable();
#endif
/*
* Are softirqs going to be turned on now:
*/
if (softirq_count() == SOFTIRQ_OFFSET)
- trace_softirqs_on((unsigned long)__builtin_return_address(0));
+ trace_softirqs_on(ip);
/*
* Keep preemption disabled until we are done with
* softirq processing:
dec_preempt_count();
#ifdef CONFIG_TRACE_IRQFLAGS
- local_irq_restore(flags);
+ local_irq_enable();
#endif
preempt_check_resched();
}
+
+void local_bh_enable(void)
+{
+ _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
EXPORT_SYMBOL(local_bh_enable);
void local_bh_enable_ip(unsigned long ip)
{
-#ifdef CONFIG_TRACE_IRQFLAGS
- unsigned long flags;
-
- WARN_ON_ONCE(in_irq());
-
- local_irq_save(flags);
-#endif
- /*
- * Are softirqs going to be turned on now:
- */
- if (softirq_count() == SOFTIRQ_OFFSET)
- trace_softirqs_on(ip);
- /*
- * Keep preemption disabled until we are done with
- * softirq processing:
- */
- sub_preempt_count(SOFTIRQ_OFFSET - 1);
-
- if (unlikely(!in_interrupt() && local_softirq_pending()))
- do_softirq();
-
- dec_preempt_count();
-#ifdef CONFIG_TRACE_IRQFLAGS
- local_irq_restore(flags);
-#endif
- preempt_check_resched();
+ _local_bh_enable_ip(ip);
}
EXPORT_SYMBOL(local_bh_enable_ip);
account_system_vtime(current);
__local_bh_disable((unsigned long)__builtin_return_address(0));
- trace_softirq_enter();
+ lockdep_softirq_enter();
cpu = smp_processor_id();
restart:
do {
if (pending & 1) {
+ int prev_count = preempt_count();
+ kstat_incr_softirqs_this_cpu(h - softirq_vec);
+
+ trace_softirq_entry(h, softirq_vec);
h->action(h);
- rcu_bh_qsctr_inc(cpu);
+ trace_softirq_exit(h, softirq_vec);
+ if (unlikely(prev_count != preempt_count())) {
+ printk(KERN_ERR "huh, entered softirq %td %s %p"
+ "with preempt_count %08x,"
+ " exited with %08x?\n", h - softirq_vec,
+ softirq_to_name[h - softirq_vec],
+ h->action, prev_count, preempt_count());
+ preempt_count() = prev_count;
+ }
+
+ rcu_bh_qs(cpu);
}
h++;
pending >>= 1;
if (pending)
wakeup_softirqd();
- trace_softirq_exit();
+ lockdep_softirq_exit();
account_system_vtime(current);
_local_bh_enable();
*/
void irq_enter(void)
{
-#ifdef CONFIG_NO_HZ
int cpu = smp_processor_id();
- if (idle_cpu(cpu) && !in_interrupt())
- tick_nohz_stop_idle(cpu);
-#endif
- __irq_enter();
-#ifdef CONFIG_NO_HZ
- if (idle_cpu(cpu))
- tick_nohz_update_jiffies();
-#endif
+
+ rcu_irq_enter();
+ if (idle_cpu(cpu) && !in_interrupt()) {
+ __irq_enter();
+ tick_check_idle(cpu);
+ } else
+ __irq_enter();
}
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
+ rcu_irq_exit();
#ifdef CONFIG_NO_HZ
/* Make sure that timer wheel updates are propagated */
- if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
- tick_nohz_stop_sched_tick();
- rcu_irq_exit();
+ if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
+ tick_nohz_stop_sched_tick(0);
#endif
preempt_enable_no_resched();
}
local_irq_restore(flags);
}
-void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+void open_softirq(int nr, void (*action)(struct softirq_action *))
{
- softirq_vec[nr].data = data;
softirq_vec[nr].action = action;
}
-/* Tasklets */
+/*
+ * Tasklets
+ */
struct tasklet_head
{
struct tasklet_struct *head;
struct tasklet_struct **tail;
};
-/* Some compilers disobey section attribute on statics when not
- initialized -- RR */
-static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
-static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
+static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
+static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
void __tasklet_schedule(struct tasklet_struct *t)
{
EXPORT_SYMBOL(__tasklet_hi_schedule);
+void __tasklet_hi_schedule_first(struct tasklet_struct *t)
+{
+ BUG_ON(!irqs_disabled());
+
+ t->next = __get_cpu_var(tasklet_hi_vec).head;
+ __get_cpu_var(tasklet_hi_vec).head = t;
+ __raise_softirq_irqoff(HI_SOFTIRQ);
+}
+
+EXPORT_SYMBOL(__tasklet_hi_schedule_first);
+
static void tasklet_action(struct softirq_action *a)
{
struct tasklet_struct *list;
printk("Attempt to kill tasklet from interrupt\n");
while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
- do
+ do {
yield();
- while (test_bit(TASKLET_STATE_SCHED, &t->state));
+ } while (test_bit(TASKLET_STATE_SCHED, &t->state));
}
tasklet_unlock_wait(t);
clear_bit(TASKLET_STATE_SCHED, &t->state);
EXPORT_SYMBOL(tasklet_kill);
+/*
+ * tasklet_hrtimer
+ */
+
+/*
+ * The trampoline is called when the hrtimer expires. It schedules a tasklet
+ * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
+ * hrtimer callback, but from softirq context.
+ */
+static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
+{
+ struct tasklet_hrtimer *ttimer =
+ container_of(timer, struct tasklet_hrtimer, timer);
+
+ tasklet_hi_schedule(&ttimer->tasklet);
+ return HRTIMER_NORESTART;
+}
+
+/*
+ * Helper function which calls the hrtimer callback from
+ * tasklet/softirq context
+ */
+static void __tasklet_hrtimer_trampoline(unsigned long data)
+{
+ struct tasklet_hrtimer *ttimer = (void *)data;
+ enum hrtimer_restart restart;
+
+ restart = ttimer->function(&ttimer->timer);
+ if (restart != HRTIMER_NORESTART)
+ hrtimer_restart(&ttimer->timer);
+}
+
+/**
+ * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
+ * @ttimer: tasklet_hrtimer which is initialized
+ * @function: hrtimer callback funtion which gets called from softirq context
+ * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
+ * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
+ */
+void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
+ enum hrtimer_restart (*function)(struct hrtimer *),
+ clockid_t which_clock, enum hrtimer_mode mode)
+{
+ hrtimer_init(&ttimer->timer, which_clock, mode);
+ ttimer->timer.function = __hrtimer_tasklet_trampoline;
+ tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
+ (unsigned long)ttimer);
+ ttimer->function = function;
+}
+EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
+
+/*
+ * Remote softirq bits
+ */
+
+DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
+EXPORT_PER_CPU_SYMBOL(softirq_work_list);
+
+static void __local_trigger(struct call_single_data *cp, int softirq)
+{
+ struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
+
+ list_add_tail(&cp->list, head);
+
+ /* Trigger the softirq only if the list was previously empty. */
+ if (head->next == &cp->list)
+ raise_softirq_irqoff(softirq);
+}
+
+#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
+static void remote_softirq_receive(void *data)
+{
+ struct call_single_data *cp = data;
+ unsigned long flags;
+ int softirq;
+
+ softirq = cp->priv;
+
+ local_irq_save(flags);
+ __local_trigger(cp, softirq);
+ local_irq_restore(flags);
+}
+
+static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+ if (cpu_online(cpu)) {
+ cp->func = remote_softirq_receive;
+ cp->info = cp;
+ cp->flags = 0;
+ cp->priv = softirq;
+
+ __smp_call_function_single(cpu, cp, 0);
+ return 0;
+ }
+ return 1;
+}
+#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
+static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+ return 1;
+}
+#endif
+
+/**
+ * __send_remote_softirq - try to schedule softirq work on a remote cpu
+ * @cp: private SMP call function data area
+ * @cpu: the remote cpu
+ * @this_cpu: the currently executing cpu
+ * @softirq: the softirq for the work
+ *
+ * Attempt to schedule softirq work on a remote cpu. If this cannot be
+ * done, the work is instead queued up on the local cpu.
+ *
+ * Interrupts must be disabled.
+ */
+void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
+{
+ if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
+ __local_trigger(cp, softirq);
+}
+EXPORT_SYMBOL(__send_remote_softirq);
+
+/**
+ * send_remote_softirq - try to schedule softirq work on a remote cpu
+ * @cp: private SMP call function data area
+ * @cpu: the remote cpu
+ * @softirq: the softirq for the work
+ *
+ * Like __send_remote_softirq except that disabling interrupts and
+ * computing the current cpu is done for the caller.
+ */
+void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+ unsigned long flags;
+ int this_cpu;
+
+ local_irq_save(flags);
+ this_cpu = smp_processor_id();
+ __send_remote_softirq(cp, cpu, this_cpu, softirq);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(send_remote_softirq);
+
+static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ /*
+ * If a CPU goes away, splice its entries to the current CPU
+ * and trigger a run of the softirq
+ */
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+ int cpu = (unsigned long) hcpu;
+ int i;
+
+ local_irq_disable();
+ for (i = 0; i < NR_SOFTIRQS; i++) {
+ struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
+ struct list_head *local_head;
+
+ if (list_empty(head))
+ continue;
+
+ local_head = &__get_cpu_var(softirq_work_list[i]);
+ list_splice_init(head, local_head);
+ raise_softirq_irqoff(i);
+ }
+ local_irq_enable();
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
+ .notifier_call = remote_softirq_cpu_notify,
+};
+
void __init softirq_init(void)
{
int cpu;
for_each_possible_cpu(cpu) {
+ int i;
+
per_cpu(tasklet_vec, cpu).tail =
&per_cpu(tasklet_vec, cpu).head;
per_cpu(tasklet_hi_vec, cpu).tail =
&per_cpu(tasklet_hi_vec, cpu).head;
+ for (i = 0; i < NR_SOFTIRQS; i++)
+ INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
}
- open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
- open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+ register_hotcpu_notifier(&remote_softirq_cpu_notifier);
+
+ open_softirq(TASKLET_SOFTIRQ, tasklet_action);
+ open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
-static int ksoftirqd(void * __bind_cpu)
+static int run_ksoftirqd(void * __bind_cpu)
{
set_current_state(TASK_INTERRUPTIBLE);
preempt_enable_no_resched();
cond_resched();
preempt_disable();
+ rcu_sched_qs((long)__bind_cpu);
}
preempt_enable();
set_current_state(TASK_INTERRUPTIBLE);
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+ p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
if (IS_ERR(p)) {
printk("ksoftirqd for %i failed\n", hotcpu);
return NOTIFY_BAD;
break;
/* Unbind so it can run. Fall thru. */
kthread_bind(per_cpu(ksoftirqd, hotcpu),
- any_online_cpu(cpu_online_map));
+ cpumask_any(cpu_online_mask));
case CPU_DEAD:
case CPU_DEAD_FROZEN: {
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
p = per_cpu(ksoftirqd, hotcpu);
per_cpu(ksoftirqd, hotcpu) = NULL;
- sched_setscheduler(p, SCHED_FIFO, ¶m);
+ sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m);
kthread_stop(p);
takeover_tasklets(hotcpu);
break;
.notifier_call = cpu_callback
};
-__init int spawn_ksoftirqd(void)
+static __init int spawn_ksoftirqd(void)
{
void *cpu = (void *)(long)smp_processor_id();
int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
register_cpu_notifier(&cpu_nfb);
return 0;
}
+early_initcall(spawn_ksoftirqd);
#ifdef CONFIG_SMP
/*
* Call a function on all processors
*/
-int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
+int on_each_cpu(void (*func) (void *info), void *info, int wait)
{
int ret = 0;
preempt_disable();
- ret = smp_call_function(func, info, retry, wait);
+ ret = smp_call_function(func, info, wait);
local_irq_disable();
func(info);
local_irq_enable();
}
EXPORT_SYMBOL(on_each_cpu);
#endif
+
+/*
+ * [ These __weak aliases are kept in a separate compilation unit, so that
+ * GCC does not inline them incorrectly. ]
+ */
+
+int __init __weak early_irq_init(void)
+{
+ return 0;
+}
+
+int __init __weak arch_probe_nr_irqs(void)
+{
+ return 0;
+}
+
+int __init __weak arch_early_irq_init(void)
+{
+ return 0;
+}
+
+int __weak arch_init_chip_data(struct irq_desc *desc, int node)
+{
+ return 0;
+}