include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit...
[safe/jmp/linux-2.6] / arch / mips / kernel / process.c
index e4f2f80..463b71b 100644 (file)
@@ -4,29 +4,32 @@
  * for more details.
  *
  * Copyright (C) 1994 - 1999, 2000 by Ralf Baechle and others.
+ * Copyright (C) 2005, 2006 by Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  * Copyright (C) 2004 Thiemo Seufer
  */
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/tick.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/ptrace.h>
-#include <linux/slab.h>
 #include <linux/mman.h>
 #include <linux/personality.h>
 #include <linux/sys.h>
 #include <linux/user.h>
-#include <linux/a.out.h>
 #include <linux/init.h>
 #include <linux/completion.h>
+#include <linux/kallsyms.h>
+#include <linux/random.h>
 
+#include <asm/asm.h>
 #include <asm/bootinfo.h>
 #include <asm/cpu.h>
+#include <asm/dsp.h>
 #include <asm/fpu.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
 #include <asm/elf.h>
 #include <asm/isadep.h>
 #include <asm/inst.h>
-
-/*
- * We use this if we don't have any better idle routine..
- * (This to kill: kernel/platform.c.
- */
-void default_idle (void)
-{
-}
+#include <asm/stacktrace.h>
 
 /*
  * The idle thread. There's no useful work to be done, so just try to conserve
  * power and have a low exit latency (ie sit in a loop waiting for somebody to
  * say that they'd like to reschedule)
  */
-ATTRIB_NORET void cpu_idle(void)
+void __noreturn cpu_idle(void)
 {
+       int cpu;
+
+       /* CPU is going idle. */
+       cpu = smp_processor_id();
+
        /* endless idle loop with no priority at all */
        while (1) {
-               while (!need_resched())
+               tick_nohz_stop_sched_tick(1);
+               while (!need_resched() && cpu_online(cpu)) {
+#ifdef CONFIG_MIPS_MT_SMTC
+                       extern void smtc_idle_loop_hook(void);
+
+                       smtc_idle_loop_hook();
+#endif
                        if (cpu_wait)
                                (*cpu_wait)();
+               }
+#ifdef CONFIG_HOTPLUG_CPU
+               if (!cpu_online(cpu) && !cpu_isset(cpu, cpu_callin_map) &&
+                   (system_state == SYSTEM_RUNNING ||
+                    system_state == SYSTEM_BOOTING))
+                       play_dead();
+#endif
+               tick_nohz_restart_sched_tick();
+               preempt_enable_no_resched();
                schedule();
+               preempt_disable();
        }
 }
 
@@ -69,15 +86,16 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
        unsigned long status;
 
        /* New thread loses kernel privileges. */
-       status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|KU_MASK);
+       status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
 #ifdef CONFIG_64BIT
-       status &= ~ST0_FR;
-       status |= (current->thread.mflags & MF_32BIT_REGS) ? 0 : ST0_FR;
+       status |= test_thread_flag(TIF_32BIT_REGS) ? 0 : ST0_FR;
 #endif
        status |= KU_USER;
        regs->cp0_status = status;
        clear_used_math();
-       lose_fpu();
+       clear_fpu_owner();
+       if (cpu_has_dsp)
+               __init_dsp();
        regs->cp0_epc = pc;
        regs->regs[29] = sp;
        current_thread_info()->addr_limit = USER_DS;
@@ -91,35 +109,33 @@ void flush_thread(void)
 {
 }
 
-int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+int copy_thread(unsigned long clone_flags, unsigned long usp,
        unsigned long unused, struct task_struct *p, struct pt_regs *regs)
 {
-       struct thread_info *ti = p->thread_info;
+       struct thread_info *ti = task_thread_info(p);
        struct pt_regs *childregs;
-       long childksp;
+       unsigned long childksp;
+       p->set_child_tid = p->clear_child_tid = NULL;
 
-       childksp = (unsigned long)ti + THREAD_SIZE - 32;
+       childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
 
        preempt_disable();
 
-       if (is_fpu_owner()) {
+       if (is_fpu_owner())
                save_fp(p);
-       }
+
+       if (cpu_has_dsp)
+               save_dsp(p);
 
        preempt_enable();
 
        /* set up new TSS. */
        childregs = (struct pt_regs *) childksp - 1;
+       /*  Put the stack after the struct pt_regs.  */
+       childksp = (unsigned long) childregs;
        *childregs = *regs;
        childregs->regs[7] = 0; /* Clear error flag */
 
-#if defined(CONFIG_BINFMT_IRIX)
-       if (current->personality != PER_LINUX) {
-               /* Under IRIX things are a little different. */
-               childregs->regs[3] = 1;
-               regs->regs[3] = 0;
-       }
-#endif
        childregs->regs[2] = 0; /* Child gets zero as return value */
        regs->regs[2] = p->pid;
 
@@ -140,8 +156,23 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
         */
        p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
        childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
+
+#ifdef CONFIG_MIPS_MT_SMTC
+       /*
+        * SMTC restores TCStatus after Status, and the CU bits
+        * are aliased there.
+        */
+       childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1);
+#endif
        clear_tsk_thread_flag(p, TIF_USEDFPU);
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+       clear_tsk_thread_flag(p, TIF_FPUBOUND);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
+       if (clone_flags & CLONE_SETTLS)
+               ti->tp_value = regs->regs[7];
+
        return 0;
 }
 
@@ -153,7 +184,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
        return 1;
 }
 
-void dump_regs(elf_greg_t *gp, struct pt_regs *regs)
+void elf_dump_regs(elf_greg_t *gp, struct pt_regs *regs)
 {
        int i;
 
@@ -175,7 +206,13 @@ void dump_regs(elf_greg_t *gp, struct pt_regs *regs)
 #endif
 }
 
-int dump_task_fpu (struct task_struct *t, elf_fpregset_t *fpr)
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+       elf_dump_regs(*regs, task_pt_regs(tsk));
+       return 1;
+}
+
+int dump_task_fpu(struct task_struct *t, elf_fpregset_t *fpr)
 {
        memcpy(fpr, &t->thread.fpu, sizeof(current->thread.fpu));
 
@@ -185,7 +222,7 @@ int dump_task_fpu (struct task_struct *t, elf_fpregset_t *fpr)
 /*
  * Create a kernel thread
  */
-ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+static void __noreturn kernel_thread_helper(void *arg, int (*fn)(void *))
 {
        do_exit(fn(arg));
 }
@@ -201,8 +238,8 @@ long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
        regs.cp0_epc = (unsigned long) kernel_thread_helper;
        regs.cp0_status = read_c0_status();
 #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
-       regs.cp0_status &= ~(ST0_KUP | ST0_IEC);
-       regs.cp0_status |= ST0_IEP;
+       regs.cp0_status = (regs.cp0_status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) |
+                         ((regs.cp0_status & (ST0_KUC | ST0_IEC)) << 2);
 #else
        regs.cp0_status |= ST0_EXL;
 #endif
@@ -211,73 +248,104 @@ long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
        return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
 }
 
+/*
+ *
+ */
 struct mips_frame_info {
-       int frame_offset;
-       int pc_offset;
+       void            *func;
+       unsigned long   func_size;
+       int             frame_size;
+       int             pc_offset;
 };
-static struct mips_frame_info schedule_frame;
-static struct mips_frame_info schedule_timeout_frame;
-static struct mips_frame_info sleep_on_frame;
-static struct mips_frame_info sleep_on_timeout_frame;
-static struct mips_frame_info wait_for_completion_frame;
-static int mips_frame_info_initialized;
-static int __init get_frame_info(struct mips_frame_info *info, void *func)
+
+static inline int is_ra_save_ins(union mips_instruction *ip)
 {
-       int i;
-       union mips_instruction *ip = (union mips_instruction *)func;
+       /* sw / sd $ra, offset($sp) */
+       return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) &&
+               ip->i_format.rs == 29 &&
+               ip->i_format.rt == 31;
+}
+
+static inline int is_jal_jalr_jr_ins(union mips_instruction *ip)
+{
+       if (ip->j_format.opcode == jal_op)
+               return 1;
+       if (ip->r_format.opcode != spec_op)
+               return 0;
+       return ip->r_format.func == jalr_op || ip->r_format.func == jr_op;
+}
+
+static inline int is_sp_move_ins(union mips_instruction *ip)
+{
+       /* addiu/daddiu sp,sp,-imm */
+       if (ip->i_format.rs != 29 || ip->i_format.rt != 29)
+               return 0;
+       if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op)
+               return 1;
+       return 0;
+}
+
+static int get_frame_info(struct mips_frame_info *info)
+{
+       union mips_instruction *ip = info->func;
+       unsigned max_insns = info->func_size / sizeof(union mips_instruction);
+       unsigned i;
+
        info->pc_offset = -1;
-       info->frame_offset = -1;
-       for (i = 0; i < 128; i++, ip++) {
-               /* if jal, jalr, jr, stop. */
-               if (ip->j_format.opcode == jal_op ||
-                   (ip->r_format.opcode == spec_op &&
-                    (ip->r_format.func == jalr_op ||
-                     ip->r_format.func == jr_op)))
-                       break;
+       info->frame_size = 0;
 
-               if (
-#ifdef CONFIG_32BIT
-                   ip->i_format.opcode == sw_op &&
-#endif
-#ifdef CONFIG_64BIT
-                   ip->i_format.opcode == sd_op &&
-#endif
-                   ip->i_format.rs == 29)
-               {
-                       /* sw / sd $ra, offset($sp) */
-                       if (ip->i_format.rt == 31) {
-                               if (info->pc_offset != -1)
-                                       break;
-                               info->pc_offset =
-                                       ip->i_format.simmediate / sizeof(long);
-                       }
-                       /* sw / sd $s8, offset($sp) */
-                       if (ip->i_format.rt == 30) {
-                               if (info->frame_offset != -1)
-                                       break;
-                               info->frame_offset =
-                                       ip->i_format.simmediate / sizeof(long);
-                       }
+       if (!ip)
+               goto err;
+
+       if (max_insns == 0)
+               max_insns = 128U;       /* unknown function size */
+       max_insns = min(128U, max_insns);
+
+       for (i = 0; i < max_insns; i++, ip++) {
+
+               if (is_jal_jalr_jr_ins(ip))
+                       break;
+               if (!info->frame_size) {
+                       if (is_sp_move_ins(ip))
+                               info->frame_size = - ip->i_format.simmediate;
+                       continue;
+               }
+               if (info->pc_offset == -1 && is_ra_save_ins(ip)) {
+                       info->pc_offset =
+                               ip->i_format.simmediate / sizeof(long);
+                       break;
                }
        }
-       if (info->pc_offset == -1 || info->frame_offset == -1) {
-               printk("Can't analyze prologue code at %p\n", func);
-               info->pc_offset = -1;
-               info->frame_offset = -1;
-               return -1;
-       }
-
-       return 0;
+       if (info->frame_size && info->pc_offset >= 0) /* nested */
+               return 0;
+       if (info->pc_offset < 0) /* leaf */
+               return 1;
+       /* prologue seems boggus... */
+err:
+       return -1;
 }
 
+static struct mips_frame_info schedule_mfi __read_mostly;
+
 static int __init frame_info_init(void)
 {
-       mips_frame_info_initialized =
-               !get_frame_info(&schedule_frame, schedule) &&
-               !get_frame_info(&schedule_timeout_frame, schedule_timeout) &&
-               !get_frame_info(&sleep_on_frame, sleep_on) &&
-               !get_frame_info(&sleep_on_timeout_frame, sleep_on_timeout) &&
-               !get_frame_info(&wait_for_completion_frame, wait_for_completion);
+       unsigned long size = 0;
+#ifdef CONFIG_KALLSYMS
+       unsigned long ofs;
+
+       kallsyms_lookup_size_offset((unsigned long)schedule, &size, &ofs);
+#endif
+       schedule_mfi.func = schedule;
+       schedule_mfi.func_size = size;
+
+       get_frame_info(&schedule_mfi);
+
+       /*
+        * Without schedule() frame info, result given by
+        * thread_saved_pc() and get_wchan() are not reliable.
+        */
+       if (schedule_mfi.pc_offset < 0)
+               printk("Can't analyze schedule() prologue at %p\n", schedule);
 
        return 0;
 }
@@ -294,71 +362,122 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
        /* New born processes are a special case */
        if (t->reg31 == (unsigned long) ret_from_fork)
                return t->reg31;
-
-       if (schedule_frame.pc_offset < 0)
+       if (schedule_mfi.pc_offset < 0)
                return 0;
-       return ((unsigned long *)t->reg29)[schedule_frame.pc_offset];
+       return ((unsigned long *)t->reg29)[schedule_mfi.pc_offset];
 }
 
-/* get_wchan - a maintenance nightmare^W^Wpain in the ass ...  */
-unsigned long get_wchan(struct task_struct *p)
+
+#ifdef CONFIG_KALLSYMS
+/* used by show_backtrace() */
+unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
+                          unsigned long pc, unsigned long *ra)
 {
-       unsigned long frame, pc;
+       unsigned long stack_page;
+       struct mips_frame_info info;
+       unsigned long size, ofs;
+       int leaf;
+       extern void ret_from_irq(void);
+       extern void ret_from_exception(void);
+
+       stack_page = (unsigned long)task_stack_page(task);
+       if (!stack_page)
+               return 0;
 
-       if (!p || p == current || p->state == TASK_RUNNING)
+       /*
+        * If we reached the bottom of interrupt context,
+        * return saved pc in pt_regs.
+        */
+       if (pc == (unsigned long)ret_from_irq ||
+           pc == (unsigned long)ret_from_exception) {
+               struct pt_regs *regs;
+               if (*sp >= stack_page &&
+                   *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) {
+                       regs = (struct pt_regs *)*sp;
+                       pc = regs->cp0_epc;
+                       if (__kernel_text_address(pc)) {
+                               *sp = regs->regs[29];
+                               *ra = regs->regs[31];
+                               return pc;
+                       }
+               }
+               return 0;
+       }
+       if (!kallsyms_lookup_size_offset(pc, &size, &ofs))
                return 0;
+       /*
+        * Return ra if an exception occured at the first instruction
+        */
+       if (unlikely(ofs == 0)) {
+               pc = *ra;
+               *ra = 0;
+               return pc;
+       }
 
-       if (!mips_frame_info_initialized)
+       info.func = (void *)(pc - ofs);
+       info.func_size = ofs;   /* analyze from start to ofs */
+       leaf = get_frame_info(&info);
+       if (leaf < 0)
+               return 0;
+
+       if (*sp < stack_page ||
+           *sp + info.frame_size > stack_page + THREAD_SIZE - 32)
                return 0;
-       pc = thread_saved_pc(p);
-       if (!in_sched_functions(pc))
-               goto out;
 
-       if (pc >= (unsigned long) sleep_on_timeout)
-               goto schedule_timeout_caller;
-       if (pc >= (unsigned long) sleep_on)
-               goto schedule_caller;
-       if (pc >= (unsigned long) interruptible_sleep_on_timeout)
-               goto schedule_timeout_caller;
-       if (pc >= (unsigned long)interruptible_sleep_on)
-               goto schedule_caller;
-       if (pc >= (unsigned long)wait_for_completion)
-               goto schedule_caller;
-       goto schedule_timeout_caller;
-
-schedule_caller:
-       frame = ((unsigned long *)p->thread.reg30)[schedule_frame.frame_offset];
-       if (pc >= (unsigned long) sleep_on)
-               pc = ((unsigned long *)frame)[sleep_on_frame.pc_offset];
+       if (leaf)
+               /*
+                * For some extreme cases, get_frame_info() can
+                * consider wrongly a nested function as a leaf
+                * one. In that cases avoid to return always the
+                * same value.
+                */
+               pc = pc != *ra ? *ra : 0;
        else
-               pc = ((unsigned long *)frame)[wait_for_completion_frame.pc_offset];
-       goto out;
+               pc = ((unsigned long *)(*sp))[info.pc_offset];
 
-schedule_timeout_caller:
-       /*
-        * The schedule_timeout frame
-        */
-       frame = ((unsigned long *)p->thread.reg30)[schedule_frame.frame_offset];
+       *sp += info.frame_size;
+       *ra = 0;
+       return __kernel_text_address(pc) ? pc : 0;
+}
+#endif
 
-       /*
-        * frame now points to sleep_on_timeout's frame
-        */
-       pc    = ((unsigned long *)frame)[schedule_timeout_frame.pc_offset];
+/*
+ * get_wchan - a maintenance nightmare^W^Wpain in the ass ...
+ */
+unsigned long get_wchan(struct task_struct *task)
+{
+       unsigned long pc = 0;
+#ifdef CONFIG_KALLSYMS
+       unsigned long sp;
+       unsigned long ra = 0;
+#endif
 
-       if (in_sched_functions(pc)) {
-               /* schedule_timeout called by [interruptible_]sleep_on_timeout */
-               frame = ((unsigned long *)frame)[schedule_timeout_frame.frame_offset];
-               pc    = ((unsigned long *)frame)[sleep_on_timeout_frame.pc_offset];
-       }
+       if (!task || task == current || task->state == TASK_RUNNING)
+               goto out;
+       if (!task_stack_page(task))
+               goto out;
 
-out:
+       pc = thread_saved_pc(task);
 
-#ifdef CONFIG_64BIT
-       if (current->thread.mflags & MF_32BIT_REGS) /* Kludge for 32-bit ps  */
-               pc &= 0xffffffffUL;
+#ifdef CONFIG_KALLSYMS
+       sp = task->thread.reg29 + schedule_mfi.frame_size;
+
+       while (in_sched_functions(pc))
+               pc = unwind_stack(task, &sp, pc, &ra);
 #endif
 
+out:
        return pc;
 }
 
-EXPORT_SYMBOL(get_wchan);
+/*
+ * Don't forget that the stack pointer must be aligned on a 8 bytes
+ * boundary for 32-bits ABI and 16 bytes for 64-bits ABI.
+ */
+unsigned long arch_align_stack(unsigned long sp)
+{
+       if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+               sp -= get_random_int() & ~PAGE_MASK;
+
+       return sp & ALMASK;
+}