/*
- * linux/arch/x86-64/kernel/process.c
- *
* Copyright (C) 1995 Linus Torvalds
*
* Pentium III FXSR, SSE support
* Gareth Hughes <gareth@valinux.com>, May 2000
- *
+ *
* X86-64 port
* Andi Kleen.
*
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/fs.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/user.h>
-#include <linux/module.h>
#include <linux/a.out.h>
#include <linux/interrupt.h>
+#include <linux/utsname.h>
#include <linux/delay.h>
+#include <linux/module.h>
#include <linux/ptrace.h>
-#include <linux/utsname.h>
#include <linux/random.h>
#include <linux/notifier.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
+#include <linux/tick.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
{
atomic_notifier_chain_register(&idle_notifier, n);
}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
- atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL(idle_notifier_unregister);
void enter_idle(void)
{
smp_mb();
local_irq_disable();
if (!need_resched()) {
- /* Enables interrupts one instruction before HLT.
- x86 special cases this so there is no race. */
- safe_halt();
- } else
- local_irq_enable();
+ ktime_t t0, t1;
+ u64 t0n, t1n;
+
+ t0 = ktime_get();
+ t0n = ktime_to_ns(t0);
+ safe_halt(); /* enables interrupts racelessly */
+ local_irq_disable();
+ t1 = ktime_get();
+ t1n = ktime_to_ns(t1);
+ sched_clock_idle_wakeup_event(t1n - t0n);
+ }
+ local_irq_enable();
current_thread_info()->status |= TS_POLLING;
}
* to poll the ->need_resched flag instead of waiting for the
* cross-CPU IPI to arrive. Use this option with caution.
*/
-static void poll_idle (void)
+static void poll_idle(void)
{
local_irq_enable();
cpu_relax();
}
-void cpu_idle_wait(void)
-{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map, tmp = current->cpus_allowed;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) &&
- !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- } while (!cpus_empty(map));
-
- set_cpus_allowed(current, tmp);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
#ifdef CONFIG_HOTPLUG_CPU
DECLARE_PER_CPU(int, cpu_state);
* low exit latency (ie sit in a loop waiting for
* somebody to say that they'd like to reschedule)
*/
-void cpu_idle (void)
+void cpu_idle(void)
{
current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
if (__get_cpu_var(cpu_idle_state))
__get_cpu_var(cpu_idle_state) = 0;
+ tick_nohz_stop_sched_tick();
+
rmb();
idle = pm_idle;
if (!idle)
__exit_idle();
}
+ tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
schedule();
preempt_disable();
}
}
+static void do_nothing(void *unused)
+{
+}
+
+void cpu_idle_wait(void)
+{
+ unsigned int cpu, this_cpu = get_cpu();
+ cpumask_t map, tmp = current->cpus_allowed;
+
+ set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+ put_cpu();
+
+ cpus_clear(map);
+ for_each_online_cpu(cpu) {
+ per_cpu(cpu_idle_state, cpu) = 1;
+ cpu_set(cpu, map);
+ }
+
+ __get_cpu_var(cpu_idle_state) = 0;
+
+ wmb();
+ do {
+ ssleep(1);
+ for_each_online_cpu(cpu) {
+ if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+ cpu_clear(cpu, map);
+ }
+ cpus_and(map, map, cpu_online_map);
+ /*
+ * We waited 1 sec, if a CPU still did not call idle
+ * it may be because it is in idle and not waking up
+ * because it has nothing to do.
+ * Give all the remaining CPUS a kick.
+ */
+ smp_call_function_mask(map, do_nothing, 0, 0);
+ } while (!cpus_empty(map));
+
+ set_cpus_allowed(current, tmp);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
/*
* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
* which can obviate IPI to trigger checking of need_resched.
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
-void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
if (!need_resched()) {
__monitor((void *)¤t_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
- __mwait(eax, ecx);
+ __mwait(ax, cx);
}
}
}
}
-static int __init idle_setup (char *str)
+static int __init idle_setup(char *str)
{
if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
}
early_param("idle", idle_setup);
-/* Prints also some state that isn't saved in the pt_regs */
+/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs * regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
- unsigned int fsindex,gsindex;
- unsigned int ds,cs,es;
+ unsigned int fsindex, gsindex;
+ unsigned int ds, cs, es;
printk("\n");
print_modules();
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
- printk_address(regs->rip);
- printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
- regs->eflags);
+ printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+ printk_address(regs->ip);
+ printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
+ regs->flags);
printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
- regs->rax, regs->rbx, regs->rcx);
+ regs->ax, regs->bx, regs->cx);
printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
- regs->rdx, regs->rsi, regs->rdi);
+ regs->dx, regs->si, regs->di);
printk("RBP: %016lx R08: %016lx R09: %016lx\n",
- regs->rbp, regs->r8, regs->r9);
+ regs->bp, regs->r8, regs->r9);
printk("R10: %016lx R11: %016lx R12: %016lx\n",
regs->r10, regs->r11, regs->r12);
printk("R13: %016lx R14: %016lx R15: %016lx\n",
struct task_struct *me = current;
struct thread_struct *t = &me->thread;
- if (me->thread.io_bitmap_ptr) {
+ if (me->thread.io_bitmap_ptr) {
struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
kfree(t->io_bitmap_ptr);
tsk->thread.debugreg3 = 0;
tsk->thread.debugreg6 = 0;
tsk->thread.debugreg7 = 0;
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
*/
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
{
- struct user_desc ud = {
+ struct user_desc ud = {
.base_addr = addr,
.limit = 0xfffff,
.seg_32bit = 1,
};
struct n_desc_struct *desc = (void *)t->thread.tls_array;
desc += tls;
- desc->a = LDT_entry_a(&ud);
- desc->b = LDT_entry_b(&ud);
+ desc->a = LDT_entry_a(&ud);
+ desc->b = LDT_entry_b(&ud);
}
static inline u32 read_32bit_tls(struct task_struct *t, int tls)
{
- struct desc_struct *desc = (void *)t->thread.tls_array;
- desc += tls;
- return desc->base0 |
- (((u32)desc->base1) << 16) |
- (((u32)desc->base2) << 24);
+ return get_desc_base(&t->thread.tls_array[tls]);
}
/*
unlazy_fpu(tsk);
}
-int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
unsigned long unused,
struct task_struct * p, struct pt_regs * regs)
{
(THREAD_SIZE + task_stack_page(p))) - 1;
*childregs = *regs;
- childregs->rax = 0;
- childregs->rsp = rsp;
- if (rsp == ~0UL)
- childregs->rsp = (unsigned long)childregs;
+ childregs->ax = 0;
+ childregs->sp = sp;
+ if (sp == ~0UL)
+ childregs->sp = (unsigned long)childregs;
- p->thread.rsp = (unsigned long) childregs;
- p->thread.rsp0 = (unsigned long) (childregs+1);
- p->thread.userrsp = me->thread.userrsp;
+ p->thread.sp = (unsigned long) childregs;
+ p->thread.sp0 = (unsigned long) (childregs+1);
+ p->thread.usersp = me->thread.usersp;
set_tsk_thread_flag(p, TIF_FORK);
memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
IO_BITMAP_BYTES);
set_tsk_thread_flag(p, TIF_IO_BITMAP);
- }
+ }
/*
* Set a new TLS for the child thread?
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32))
- err = ia32_child_tls(p, childregs);
+ err = do_set_thread_area(p, -1,
+ (struct user_desc __user *)childregs->si, 0);
else
#endif
err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
/*
* This special macro can be used to load a debugging register
*/
-#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
+#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
+
+/*
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+ struct pt_regs *pp, ptregs;
+
+ pp = task_pt_regs(tsk);
+
+ ptregs = *pp;
+ ptregs.cs &= 0xffff;
+ ptregs.ss &= 0xffff;
+
+ elf_core_copy_regs(regs, &ptregs);
+
+ return 1;
+}
static inline void __switch_to_xtra(struct task_struct *prev_p,
- struct task_struct *next_p,
- struct tss_struct *tss)
+ struct task_struct *next_p,
+ struct tss_struct *tss)
{
struct thread_struct *prev, *next;
prev = &prev_p->thread,
next = &next_p->thread;
+ if (next->debugctlmsr != prev->debugctlmsr)
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
+
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
loaddebug(next, 0);
loaddebug(next, 1);
/*
* switch_to(x,y) should switch tasks from x to y.
*
- * This could still be optimized:
+ * This could still be optimized:
* - fold all the options into a flag word and test it with a single test.
* - could test fs/gs bitsliced
*
* Kprobes not supported here. Set the probe on schedule instead.
*/
-__kprobes struct task_struct *
+struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
- int cpu = smp_processor_id();
+ int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
/* we're going to use this soon, after a few expensive things */
/*
* Reload esp0, LDT and the page table pointer:
*/
- tss->rsp0 = next->rsp0;
+ tss->sp0 = next->sp0;
/*
* Switch DS and ES.
/*
* Switch the PDA and FPU contexts.
*/
- prev->userrsp = read_pda(oldrsp);
- write_pda(oldrsp, next->userrsp);
+ prev->usersp = read_pda(oldrsp);
+ write_pda(oldrsp, next->usersp);
write_pda(pcurrent, next_p);
write_pda(kernelstack,
/*
* sys_execve() executes a new program.
*/
-asmlinkage
+asmlinkage
long sys_execve(char __user *name, char __user * __user *argv,
char __user * __user *envp, struct pt_regs regs)
{
if (IS_ERR(filename))
return error;
error = do_execve(filename, argv, envp, ®s);
- if (error == 0) {
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
- }
putname(filename);
return error;
}
/* inherit personality from parent */
/* Make sure to be in 64bit mode */
- clear_thread_flag(TIF_IA32);
+ clear_thread_flag(TIF_IA32);
/* TBD: overwrites user setup. Should have two bits.
But 64bit processes have always behaved this way,
so it's not too bad. The main problem is just that
- 32bit childs are affected again. */
+ 32bit childs are affected again. */
current->personality &= ~READ_IMPLIES_EXEC;
}
asmlinkage long sys_fork(struct pt_regs *regs)
{
- return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
+ return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
asmlinkage long
void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
{
if (!newsp)
- newsp = regs->rsp;
+ newsp = regs->sp;
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
*/
asmlinkage long sys_vfork(struct pt_regs *regs)
{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
NULL, NULL);
}
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;
- u64 fp,rip;
+ u64 fp,ip;
int count = 0;
if (!p || p == current || p->state==TASK_RUNNING)
return 0;
stack = (unsigned long)task_stack_page(p);
- if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
+ if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
return 0;
- fp = *(u64 *)(p->thread.rsp);
+ fp = *(u64 *)(p->thread.sp);
do {
if (fp < (unsigned long)stack ||
fp > (unsigned long)stack+THREAD_SIZE)
return 0;
- rip = *(u64 *)(fp+8);
- if (!in_sched_functions(rip))
- return rip;
+ ip = *(u64 *)(fp+8);
+ if (!in_sched_functions(ip))
+ return ip;
fp = *(u64 *)fp;
} while (count++ < 16);
return 0;
/* Not strictly needed for fs, but do it for symmetry
with gs */
if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
+ return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
+ /* handle small bases via the GDT because that's faster to
switch. */
- if (addr <= 0xffffffff) {
+ if (addr <= 0xffffffff) {
set_32bit_tls(task, FS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
+ if (doit) {
+ load_TLS(&task->thread, cpu);
asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
}
task->thread.fsindex = FS_TLS_SEL;
task->thread.fs = 0;
- } else {
+ } else {
task->thread.fsindex = 0;
task->thread.fs = addr;
if (doit) {
}
put_cpu();
break;
- case ARCH_GET_FS: {
- unsigned long base;
+ case ARCH_GET_FS: {
+ unsigned long base;
if (task->thread.fsindex == FS_TLS_SEL)
base = read_32bit_tls(task, FS_TLS);
else if (doit)
rdmsrl(MSR_FS_BASE, base);
else
base = task->thread.fs;
- ret = put_user(base, (unsigned long __user *)addr);
- break;
+ ret = put_user(base, (unsigned long __user *)addr);
+ break;
}
- case ARCH_GET_GS: {
+ case ARCH_GET_GS: {
unsigned long base;
unsigned gsindex;
if (task->thread.gsindex == GS_TLS_SEL)
base = read_32bit_tls(task, GS_TLS);
else if (doit) {
- asm("movl %%gs,%0" : "=r" (gsindex));
+ asm("movl %%gs,%0" : "=r" (gsindex));
if (gsindex)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
}
else
base = task->thread.gs;
- ret = put_user(base, (unsigned long __user *)addr);
+ ret = put_user(base, (unsigned long __user *)addr);
break;
}
default:
ret = -EINVAL;
break;
- }
+ }
- return ret;
-}
+ return ret;
+}
long sys_arch_prctl(int code, unsigned long addr)
{
return do_arch_prctl(current, code, addr);
-}
-
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
- struct pt_regs *pp, ptregs;
-
- pp = task_pt_regs(tsk);
-
- ptregs = *pp;
- ptregs.cs &= 0xffff;
- ptregs.ss &= 0xffff;
-
- elf_core_copy_regs(regs, &ptregs);
-
- return 1;
}
unsigned long arch_align_stack(unsigned long sp)
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long range_end = mm->brk + 0x02000000;
+ return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+