x86: clean up process_32/64.c
[safe/jmp/linux-2.6] / arch / x86 / kernel / process_64.c
index ab79e1d..4c4d8b3 100644 (file)
@@ -3,7 +3,7 @@
  *
  *  Pentium III FXSR, SSE support
  *     Gareth Hughes <gareth@valinux.com>, May 2000
- * 
+ *
  *  X86-64 port
  *     Andi Kleen.
  *
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/fs.h>
 #include <linux/elfcore.h>
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <linux/user.h>
-#include <linux/module.h>
 #include <linux/a.out.h>
 #include <linux/interrupt.h>
+#include <linux/utsname.h>
 #include <linux/delay.h>
+#include <linux/module.h>
 #include <linux/ptrace.h>
-#include <linux/utsname.h>
 #include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
@@ -72,13 +72,6 @@ void idle_notifier_register(struct notifier_block *n)
 {
        atomic_notifier_chain_register(&idle_notifier, n);
 }
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
-       atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL(idle_notifier_unregister);
 
 void enter_idle(void)
 {
@@ -116,11 +109,18 @@ static void default_idle(void)
        smp_mb();
        local_irq_disable();
        if (!need_resched()) {
-               /* Enables interrupts one instruction before HLT.
-                  x86 special cases this so there is no race. */
-               safe_halt();
-       } else
-               local_irq_enable();
+               ktime_t t0, t1;
+               u64 t0n, t1n;
+
+               t0 = ktime_get();
+               t0n = ktime_to_ns(t0);
+               safe_halt();    /* enables interrupts racelessly */
+               local_irq_disable();
+               t1 = ktime_get();
+               t1n = ktime_to_ns(t1);
+               sched_clock_idle_wakeup_event(t1n - t0n);
+       }
+       local_irq_enable();
        current_thread_info()->status |= TS_POLLING;
 }
 
@@ -129,54 +129,12 @@ static void default_idle(void)
  * to poll the ->need_resched flag instead of waiting for the
  * cross-CPU IPI to arrive. Use this option with caution.
  */
-static void poll_idle (void)
+static void poll_idle(void)
 {
        local_irq_enable();
        cpu_relax();
 }
 
-static void do_nothing(void *unused)
-{
-}
-
-void cpu_idle_wait(void)
-{
-       unsigned int cpu, this_cpu = get_cpu();
-       cpumask_t map, tmp = current->cpus_allowed;
-
-       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
-       put_cpu();
-
-       cpus_clear(map);
-       for_each_online_cpu(cpu) {
-               per_cpu(cpu_idle_state, cpu) = 1;
-               cpu_set(cpu, map);
-       }
-
-       __get_cpu_var(cpu_idle_state) = 0;
-
-       wmb();
-       do {
-               ssleep(1);
-               for_each_online_cpu(cpu) {
-                       if (cpu_isset(cpu, map) &&
-                                       !per_cpu(cpu_idle_state, cpu))
-                               cpu_clear(cpu, map);
-               }
-               cpus_and(map, map, cpu_online_map);
-               /*
-                * We waited 1 sec, if a CPU still did not call idle
-                * it may be because it is in idle and not waking up
-                * because it has nothing to do.
-                * Give all the remaining CPUS a kick.
-                */
-               smp_call_function_mask(map, do_nothing, 0, 0);
-       } while (!cpus_empty(map));
-
-       set_cpus_allowed(current, tmp);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
 #ifdef CONFIG_HOTPLUG_CPU
 DECLARE_PER_CPU(int, cpu_state);
 
@@ -207,7 +165,7 @@ static inline void play_dead(void)
  * low exit latency (ie sit in a loop waiting for
  * somebody to say that they'd like to reschedule)
  */
-void cpu_idle (void)
+void cpu_idle(void)
 {
        current_thread_info()->status |= TS_POLLING;
        /* endless idle loop with no priority at all */
@@ -247,6 +205,47 @@ void cpu_idle (void)
        }
 }
 
+static void do_nothing(void *unused)
+{
+}
+
+void cpu_idle_wait(void)
+{
+       unsigned int cpu, this_cpu = get_cpu();
+       cpumask_t map, tmp = current->cpus_allowed;
+
+       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+       put_cpu();
+
+       cpus_clear(map);
+       for_each_online_cpu(cpu) {
+               per_cpu(cpu_idle_state, cpu) = 1;
+               cpu_set(cpu, map);
+       }
+
+       __get_cpu_var(cpu_idle_state) = 0;
+
+       wmb();
+       do {
+               ssleep(1);
+               for_each_online_cpu(cpu) {
+                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+                               cpu_clear(cpu, map);
+               }
+               cpus_and(map, map, cpu_online_map);
+               /*
+                * We waited 1 sec, if a CPU still did not call idle
+                * it may be because it is in idle and not waking up
+                * because it has nothing to do.
+                * Give all the remaining CPUS a kick.
+                */
+               smp_call_function_mask(map, do_nothing, 0, 0);
+       } while (!cpus_empty(map));
+
+       set_cpus_allowed(current, tmp);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
@@ -257,13 +256,13 @@ void cpu_idle (void)
  * New with Core Duo processors, MWAIT can take some hints based on CPU
  * capability.
  */
-void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
        if (!need_resched()) {
                __monitor((void *)&current_thread_info()->flags, 0, 0);
                smp_mb();
                if (!need_resched())
-                       __mwait(eax, ecx);
+                       __mwait(ax, cx);
        }
 }
 
@@ -300,7 +299,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
        }
 }
 
-static int __init idle_setup (char *str)
+static int __init idle_setup(char *str)
 {
        if (!strcmp(str, "poll")) {
                printk("using polling idle threads.\n");
@@ -315,13 +314,13 @@ static int __init idle_setup (char *str)
 }
 early_param("idle", idle_setup);
 
-/* Prints also some state that isn't saved in the pt_regs */ 
+/* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs * regs)
 {
        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
        unsigned long d0, d1, d2, d3, d6, d7;
-       unsigned int fsindex,gsindex;
-       unsigned int ds,cs,es; 
+       unsigned int fsindex, gsindex;
+       unsigned int ds, cs, es;
 
        printk("\n");
        print_modules();
@@ -330,16 +329,16 @@ void __show_regs(struct pt_regs * regs)
                init_utsname()->release,
                (int)strcspn(init_utsname()->version, " "),
                init_utsname()->version);
-       printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
-       printk_address(regs->rip); 
-       printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
-               regs->eflags);
+       printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+       printk_address(regs->ip);
+       printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
+               regs->flags);
        printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
-              regs->rax, regs->rbx, regs->rcx);
+              regs->ax, regs->bx, regs->cx);
        printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
-              regs->rdx, regs->rsi, regs->rdi); 
+              regs->dx, regs->si, regs->di);
        printk("RBP: %016lx R08: %016lx R09: %016lx\n",
-              regs->rbp, regs->r8, regs->r9); 
+              regs->bp, regs->r8, regs->r9);
        printk("R10: %016lx R11: %016lx R12: %016lx\n",
               regs->r10, regs->r11, regs->r12); 
        printk("R13: %016lx R14: %016lx R15: %016lx\n",
@@ -390,7 +389,7 @@ void exit_thread(void)
        struct task_struct *me = current;
        struct thread_struct *t = &me->thread;
 
-       if (me->thread.io_bitmap_ptr) { 
+       if (me->thread.io_bitmap_ptr) {
                struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
 
                kfree(t->io_bitmap_ptr);
@@ -426,7 +425,7 @@ void flush_thread(void)
        tsk->thread.debugreg3 = 0;
        tsk->thread.debugreg6 = 0;
        tsk->thread.debugreg7 = 0;
-       memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));        
+       memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
        /*
         * Forget coprocessor state..
         */
@@ -449,7 +448,7 @@ void release_thread(struct task_struct *dead_task)
 
 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 {
-       struct user_desc ud = { 
+       struct user_desc ud = {
                .base_addr = addr,
                .limit = 0xfffff,
                .seg_32bit = 1,
@@ -458,17 +457,13 @@ static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
        };
        struct n_desc_struct *desc = (void *)t->thread.tls_array;
        desc += tls;
-       desc->a = LDT_entry_a(&ud); 
-       desc->b = LDT_entry_b(&ud); 
+       desc->a = LDT_entry_a(&ud);
+       desc->b = LDT_entry_b(&ud);
 }
 
 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 {
-       struct desc_struct *desc = (void *)t->thread.tls_array;
-       desc += tls;
-       return desc->base0 | 
-               (((u32)desc->base1) << 16) | 
-               (((u32)desc->base2) << 24);
+       return get_desc_base(&t->thread.tls_array[tls]);
 }
 
 /*
@@ -480,7 +475,7 @@ void prepare_to_copy(struct task_struct *tsk)
        unlazy_fpu(tsk);
 }
 
-int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 
+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
                unsigned long unused,
        struct task_struct * p, struct pt_regs * regs)
 {
@@ -492,14 +487,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
                        (THREAD_SIZE + task_stack_page(p))) - 1;
        *childregs = *regs;
 
-       childregs->rax = 0;
-       childregs->rsp = rsp;
-       if (rsp == ~0UL)
-               childregs->rsp = (unsigned long)childregs;
+       childregs->ax = 0;
+       childregs->sp = sp;
+       if (sp == ~0UL)
+               childregs->sp = (unsigned long)childregs;
 
-       p->thread.rsp = (unsigned long) childregs;
-       p->thread.rsp0 = (unsigned long) (childregs+1);
-       p->thread.userrsp = me->thread.userrsp; 
+       p->thread.sp = (unsigned long) childregs;
+       p->thread.sp0 = (unsigned long) (childregs+1);
+       p->thread.usersp = me->thread.usersp;
 
        set_tsk_thread_flag(p, TIF_FORK);
 
@@ -520,7 +515,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
                memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
                                IO_BITMAP_BYTES);
                set_tsk_thread_flag(p, TIF_IO_BITMAP);
-       } 
+       }
 
        /*
         * Set a new TLS for the child thread?
@@ -528,7 +523,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
        if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
                if (test_thread_flag(TIF_IA32))
-                       err = ia32_child_tls(p, childregs); 
+                       err = do_set_thread_area(p, -1,
+                               (struct user_desc __user *)childregs->si, 0);
                else                    
 #endif  
                        err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
@@ -547,17 +543,38 @@ out:
 /*
  * This special macro can be used to load a debugging register
  */
-#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
+#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
+
+/*
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+       struct pt_regs *pp, ptregs;
+
+       pp = task_pt_regs(tsk);
+
+       ptregs = *pp;
+       ptregs.cs &= 0xffff;
+       ptregs.ss &= 0xffff;
+
+       elf_core_copy_regs(regs, &ptregs);
+
+       return 1;
+}
 
 static inline void __switch_to_xtra(struct task_struct *prev_p,
-                                   struct task_struct *next_p,
-                                   struct tss_struct *tss)
+                                   struct task_struct *next_p,
+                                   struct tss_struct *tss)
 {
        struct thread_struct *prev, *next;
 
        prev = &prev_p->thread,
        next = &next_p->thread;
 
+       if (next->debugctlmsr != prev->debugctlmsr)
+               wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
+
        if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
                loaddebug(next, 0);
                loaddebug(next, 1);
@@ -586,7 +603,7 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 /*
  *     switch_to(x,y) should switch tasks from x to y.
  *
- * This could still be optimized: 
+ * This could still be optimized:
  * - fold all the options into a flag word and test it with a single test.
  * - could test fs/gs bitsliced
  *
@@ -597,7 +614,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
        struct thread_struct *prev = &prev_p->thread,
                                 *next = &next_p->thread;
-       int cpu = smp_processor_id();  
+       int cpu = smp_processor_id();
        struct tss_struct *tss = &per_cpu(init_tss, cpu);
 
        /* we're going to use this soon, after a few expensive things */
@@ -607,7 +624,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        /*
         * Reload esp0, LDT and the page table pointer:
         */
-       tss->rsp0 = next->rsp0;
+       tss->sp0 = next->sp0;
 
        /* 
         * Switch DS and ES.
@@ -666,8 +683,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        /* 
         * Switch the PDA and FPU contexts.
         */
-       prev->userrsp = read_pda(oldrsp); 
-       write_pda(oldrsp, next->userrsp); 
+       prev->usersp = read_pda(oldrsp);
+       write_pda(oldrsp, next->usersp);
        write_pda(pcurrent, next_p); 
 
        write_pda(kernelstack,
@@ -700,7 +717,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage 
+asmlinkage
 long sys_execve(char __user *name, char __user * __user *argv,
                char __user * __user *envp, struct pt_regs regs)
 {
@@ -712,11 +729,6 @@ long sys_execve(char __user *name, char __user * __user *argv,
        if (IS_ERR(filename)) 
                return error;
        error = do_execve(filename, argv, envp, &regs); 
-       if (error == 0) {
-               task_lock(current);
-               current->ptrace &= ~PT_DTRACE;
-               task_unlock(current);
-       }
        putname(filename);
        return error;
 }
@@ -726,18 +738,18 @@ void set_personality_64bit(void)
        /* inherit personality from parent */
 
        /* Make sure to be in 64bit mode */
-       clear_thread_flag(TIF_IA32); 
+       clear_thread_flag(TIF_IA32);
 
        /* TBD: overwrites user setup. Should have two bits.
           But 64bit processes have always behaved this way,
           so it's not too bad. The main problem is just that
-          32bit childs are affected again. */
+          32bit childs are affected again. */
        current->personality &= ~READ_IMPLIES_EXEC;
 }
 
 asmlinkage long sys_fork(struct pt_regs *regs)
 {
-       return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
+       return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 }
 
 asmlinkage long
@@ -745,7 +757,7 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
          void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 {
        if (!newsp)
-               newsp = regs->rsp;
+               newsp = regs->sp;
        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
@@ -761,29 +773,29 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
  */
 asmlinkage long sys_vfork(struct pt_regs *regs)
 {
-       return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
+       return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
                    NULL, NULL);
 }
 
 unsigned long get_wchan(struct task_struct *p)
 {
        unsigned long stack;
-       u64 fp,rip;
+       u64 fp,ip;
        int count = 0;
 
        if (!p || p == current || p->state==TASK_RUNNING)
                return 0; 
        stack = (unsigned long)task_stack_page(p);
-       if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
+       if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
                return 0;
-       fp = *(u64 *)(p->thread.rsp);
+       fp = *(u64 *)(p->thread.sp);
        do { 
                if (fp < (unsigned long)stack ||
                    fp > (unsigned long)stack+THREAD_SIZE)
                        return 0; 
-               rip = *(u64 *)(fp+8); 
-               if (!in_sched_functions(rip))
-                       return rip; 
+               ip = *(u64 *)(fp+8);
+               if (!in_sched_functions(ip))
+                       return ip;
                fp = *(u64 *)fp; 
        } while (count++ < 16); 
        return 0;
@@ -824,19 +836,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                /* Not strictly needed for fs, but do it for symmetry
                   with gs */
                if (addr >= TASK_SIZE_OF(task))
-                       return -EPERM; 
+                       return -EPERM;
                cpu = get_cpu();
-               /* handle small bases via the GDT because that's faster to 
+               /* handle small bases via the GDT because that's faster to
                   switch. */
-               if (addr <= 0xffffffff) { 
+               if (addr <= 0xffffffff) {
                        set_32bit_tls(task, FS_TLS, addr);
-                       if (doit) { 
-                               load_TLS(&task->thread, cpu); 
+                       if (doit) {
+                               load_TLS(&task->thread, cpu);
                                asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
                        }
                        task->thread.fsindex = FS_TLS_SEL;
                        task->thread.fs = 0;
-               } else { 
+               } else {
                        task->thread.fsindex = 0;
                        task->thread.fs = addr;
                        if (doit) {
@@ -848,24 +860,24 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                }
                put_cpu();
                break;
-       case ARCH_GET_FS: { 
-               unsigned long base; 
+       case ARCH_GET_FS: {
+               unsigned long base;
                if (task->thread.fsindex == FS_TLS_SEL)
                        base = read_32bit_tls(task, FS_TLS);
                else if (doit)
                        rdmsrl(MSR_FS_BASE, base);
                else
                        base = task->thread.fs;
-               ret = put_user(base, (unsigned long __user *)addr); 
-               break; 
+               ret = put_user(base, (unsigned long __user *)addr);
+               break;
        }
-       case ARCH_GET_GS: { 
+       case ARCH_GET_GS: {
                unsigned long base;
                unsigned gsindex;
                if (task->thread.gsindex == GS_TLS_SEL)
                        base = read_32bit_tls(task, GS_TLS);
                else if (doit) {
-                       asm("movl %%gs,%0" : "=r" (gsindex));
+                       asm("movl %%gs,%0" : "=r" (gsindex));
                        if (gsindex)
                                rdmsrl(MSR_KERNEL_GS_BASE, base);
                        else
@@ -873,39 +885,21 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                }
                else
                        base = task->thread.gs;
-               ret = put_user(base, (unsigned long __user *)addr); 
+               ret = put_user(base, (unsigned long __user *)addr);
                break;
        }
 
        default:
                ret = -EINVAL;
                break;
-       } 
+       }
 
-       return ret;     
-} 
+       return ret;
+}
 
 long sys_arch_prctl(int code, unsigned long addr)
 {
        return do_arch_prctl(current, code, addr);
-} 
-
-/* 
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
-       struct pt_regs *pp, ptregs;
-
-       pp = task_pt_regs(tsk);
-
-       ptregs = *pp; 
-       ptregs.cs &= 0xffff;
-       ptregs.ss &= 0xffff;
-
-       elf_core_copy_regs(regs, &ptregs);
-       return 1;
 }
 
 unsigned long arch_align_stack(unsigned long sp)
@@ -914,3 +908,10 @@ unsigned long arch_align_stack(unsigned long sp)
                sp -= get_random_int() % 8192;
        return sp & ~0xf;
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+       unsigned long range_end = mm->brk + 0x02000000;
+       return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+