2 * Copyright (C) 1995 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
10 * CPU hotplug support - ashok.raj@intel.com
14 * This file handles the architecture-dependent parts of process handling..
17 #include <linux/stackprotector.h>
18 #include <linux/cpu.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
22 #include <linux/kernel.h>
24 #include <linux/elfcore.h>
25 #include <linux/smp.h>
26 #include <linux/slab.h>
27 #include <linux/user.h>
28 #include <linux/interrupt.h>
29 #include <linux/utsname.h>
30 #include <linux/delay.h>
31 #include <linux/module.h>
32 #include <linux/ptrace.h>
33 #include <linux/notifier.h>
34 #include <linux/kprobes.h>
35 #include <linux/kdebug.h>
36 #include <linux/tick.h>
37 #include <linux/prctl.h>
38 #include <linux/uaccess.h>
40 #include <linux/ftrace.h>
41 #include <linux/dmi.h>
43 #include <asm/pgtable.h>
44 #include <asm/system.h>
45 #include <asm/processor.h>
47 #include <asm/mmu_context.h>
48 #include <asm/prctl.h>
50 #include <asm/proto.h>
53 #include <asm/syscalls.h>
55 #include <asm/debugreg.h>
57 asmlinkage extern void ret_from_fork(void);
59 DEFINE_PER_CPU(unsigned long, old_rsp);
60 static DEFINE_PER_CPU(unsigned char, is_idle);
62 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
64 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
66 void idle_notifier_register(struct notifier_block *n)
68 atomic_notifier_chain_register(&idle_notifier, n);
70 EXPORT_SYMBOL_GPL(idle_notifier_register);
72 void idle_notifier_unregister(struct notifier_block *n)
74 atomic_notifier_chain_unregister(&idle_notifier, n);
76 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
80 percpu_write(is_idle, 1);
81 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
84 static void __exit_idle(void)
86 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
88 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
91 /* Called from interrupts to signify idle end */
94 /* idle loop has pid 0 */
101 static inline void play_dead(void)
108 * The idle thread. There's no useful work to be
109 * done, so just try to conserve power and have a
110 * low exit latency (ie sit in a loop waiting for
111 * somebody to say that they'd like to reschedule)
115 current_thread_info()->status |= TS_POLLING;
118 * If we're the non-boot CPU, nothing set the stack canary up
119 * for us. CPU0 already has it initialized but no harm in
120 * doing it again. This is a good place for updating it, as
121 * we wont ever return from this function (so the invalid
122 * canaries already on the stack wont ever trigger).
124 boot_init_stack_canary();
126 /* endless idle loop with no priority at all */
128 tick_nohz_stop_sched_tick(1);
129 while (!need_resched()) {
133 if (cpu_is_offline(smp_processor_id()))
136 * Idle routines should keep interrupts disabled
137 * from here on, until they go to idle.
138 * Otherwise, idle callbacks can misfire.
142 /* Don't trace irqs off for idle */
143 stop_critical_timings();
145 start_critical_timings();
146 /* In many cases the interrupt that ended idle
147 has already called exit_idle. But some idle
148 loops can be woken up without interrupt. */
152 tick_nohz_restart_sched_tick();
153 preempt_enable_no_resched();
159 /* Prints also some state that isn't saved in the pt_regs */
160 void __show_regs(struct pt_regs *regs, int all)
162 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
163 unsigned long d0, d1, d2, d3, d6, d7;
164 unsigned int fsindex, gsindex;
165 unsigned int ds, cs, es;
170 board = dmi_get_system_info(DMI_PRODUCT_NAME);
173 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
174 current->pid, current->comm, print_tainted(),
175 init_utsname()->release,
176 (int)strcspn(init_utsname()->version, " "),
177 init_utsname()->version, board);
178 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
179 printk_address(regs->ip, 1);
180 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
181 regs->sp, regs->flags);
182 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
183 regs->ax, regs->bx, regs->cx);
184 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
185 regs->dx, regs->si, regs->di);
186 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
187 regs->bp, regs->r8, regs->r9);
188 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
189 regs->r10, regs->r11, regs->r12);
190 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
191 regs->r13, regs->r14, regs->r15);
193 asm("movl %%ds,%0" : "=r" (ds));
194 asm("movl %%cs,%0" : "=r" (cs));
195 asm("movl %%es,%0" : "=r" (es));
196 asm("movl %%fs,%0" : "=r" (fsindex));
197 asm("movl %%gs,%0" : "=r" (gsindex));
199 rdmsrl(MSR_FS_BASE, fs);
200 rdmsrl(MSR_GS_BASE, gs);
201 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
211 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
212 fs, fsindex, gs, gsindex, shadowgs);
213 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
215 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
221 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
225 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
228 void show_regs(struct pt_regs *regs)
230 show_registers(regs);
231 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
234 void release_thread(struct task_struct *dead_task)
237 if (dead_task->mm->context.size) {
238 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
240 dead_task->mm->context.ldt,
241 dead_task->mm->context.size);
247 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
249 struct user_desc ud = {
256 struct desc_struct *desc = t->thread.tls_array;
261 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
263 return get_desc_base(&t->thread.tls_array[tls]);
267 * This gets called before we allocate a new thread and copy
268 * the current task into it.
270 void prepare_to_copy(struct task_struct *tsk)
275 int copy_thread(unsigned long clone_flags, unsigned long sp,
276 unsigned long unused,
277 struct task_struct *p, struct pt_regs *regs)
280 struct pt_regs *childregs;
281 struct task_struct *me = current;
283 childregs = ((struct pt_regs *)
284 (THREAD_SIZE + task_stack_page(p))) - 1;
290 childregs->sp = (unsigned long)childregs;
292 p->thread.sp = (unsigned long) childregs;
293 p->thread.sp0 = (unsigned long) (childregs+1);
294 p->thread.usersp = me->thread.usersp;
296 set_tsk_thread_flag(p, TIF_FORK);
298 p->thread.fs = me->thread.fs;
299 p->thread.gs = me->thread.gs;
300 p->thread.io_bitmap_ptr = NULL;
302 savesegment(gs, p->thread.gsindex);
303 savesegment(fs, p->thread.fsindex);
304 savesegment(es, p->thread.es);
305 savesegment(ds, p->thread.ds);
308 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
310 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
311 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
312 if (!p->thread.io_bitmap_ptr) {
313 p->thread.io_bitmap_max = 0;
316 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
318 set_tsk_thread_flag(p, TIF_IO_BITMAP);
322 * Set a new TLS for the child thread?
324 if (clone_flags & CLONE_SETTLS) {
325 #ifdef CONFIG_IA32_EMULATION
326 if (test_thread_flag(TIF_IA32))
327 err = do_set_thread_area(p, -1,
328 (struct user_desc __user *)childregs->si, 0);
331 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
336 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
337 p->thread.ds_ctx = NULL;
339 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
340 p->thread.debugctlmsr = 0;
344 if (err && p->thread.io_bitmap_ptr) {
345 kfree(p->thread.io_bitmap_ptr);
346 p->thread.io_bitmap_max = 0;
353 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
354 unsigned long new_sp,
355 unsigned int _cs, unsigned int _ss, unsigned int _ds)
358 loadsegment(es, _ds);
359 loadsegment(ds, _ds);
363 percpu_write(old_rsp, new_sp);
366 regs->flags = X86_EFLAGS_IF;
369 * Free the old FP and other extended state
371 free_thread_xstate(current);
375 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
377 start_thread_common(regs, new_ip, new_sp,
378 __USER_CS, __USER_DS, 0);
381 #ifdef CONFIG_IA32_EMULATION
382 void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
384 start_thread_common(regs, new_ip, new_sp,
385 __USER32_CS, __USER32_DS, __USER32_DS);
390 * switch_to(x,y) should switch tasks from x to y.
392 * This could still be optimized:
393 * - fold all the options into a flag word and test it with a single test.
394 * - could test fs/gs bitsliced
396 * Kprobes not supported here. Set the probe on schedule instead.
397 * Function graph tracer not supported too.
399 __notrace_funcgraph struct task_struct *
400 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
402 struct thread_struct *prev = &prev_p->thread;
403 struct thread_struct *next = &next_p->thread;
404 int cpu = smp_processor_id();
405 struct tss_struct *tss = &per_cpu(init_tss, cpu);
406 unsigned fsindex, gsindex;
410 * If the task has used fpu the last 5 timeslices, just do a full
411 * restore of the math state immediately to avoid the trap; the
412 * chances of needing FPU soon are obviously high now
414 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
416 /* we're going to use this soon, after a few expensive things */
418 prefetch(next->xstate);
421 * Reload esp0, LDT and the page table pointer:
427 * This won't pick up thread selector changes, but I guess that is ok.
429 savesegment(es, prev->es);
430 if (unlikely(next->es | prev->es))
431 loadsegment(es, next->es);
433 savesegment(ds, prev->ds);
434 if (unlikely(next->ds | prev->ds))
435 loadsegment(ds, next->ds);
438 /* We must save %fs and %gs before load_TLS() because
439 * %fs and %gs may be cleared by load_TLS().
441 * (e.g. xen_load_tls())
443 savesegment(fs, fsindex);
444 savesegment(gs, gsindex);
448 /* Must be after DS reload */
451 /* Make sure cpu is ready for new context */
456 * Leave lazy mode, flushing any hypercalls made here.
457 * This must be done before restoring TLS segments so
458 * the GDT and LDT are properly updated, and must be
459 * done before math_state_restore, so the TS bit is up
462 arch_end_context_switch(next_p);
467 * Segment register != 0 always requires a reload. Also
468 * reload when it has changed. When prev process used 64bit
469 * base always reload to avoid an information leak.
471 if (unlikely(fsindex | next->fsindex | prev->fs)) {
472 loadsegment(fs, next->fsindex);
474 * Check if the user used a selector != 0; if yes
475 * clear 64bit base, since overloaded base is always
476 * mapped to the Null selector
481 /* when next process has a 64bit base use it */
483 wrmsrl(MSR_FS_BASE, next->fs);
484 prev->fsindex = fsindex;
486 if (unlikely(gsindex | next->gsindex | prev->gs)) {
487 load_gs_index(next->gsindex);
492 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
493 prev->gsindex = gsindex;
496 * Switch the PDA and FPU contexts.
498 prev->usersp = percpu_read(old_rsp);
499 percpu_write(old_rsp, next->usersp);
500 percpu_write(current_task, next_p);
502 percpu_write(kernel_stack,
503 (unsigned long)task_stack_page(next_p) +
504 THREAD_SIZE - KERNEL_STACK_OFFSET);
507 * Now maybe reload the debug registers and handle I/O bitmaps
509 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
510 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
511 __switch_to_xtra(prev_p, next_p, tss);
514 * Preload the FPU context, now that we've determined that the
515 * task is likely to be using it.
518 __math_state_restore();
524 * sys_execve() executes a new program.
527 long sys_execve(char __user *name, char __user * __user *argv,
528 char __user * __user *envp, struct pt_regs *regs)
533 filename = getname(name);
534 error = PTR_ERR(filename);
535 if (IS_ERR(filename))
537 error = do_execve(filename, argv, envp, regs);
542 void set_personality_64bit(void)
544 /* inherit personality from parent */
546 /* Make sure to be in 64bit mode */
547 clear_thread_flag(TIF_IA32);
549 /* TBD: overwrites user setup. Should have two bits.
550 But 64bit processes have always behaved this way,
551 so it's not too bad. The main problem is just that
552 32bit childs are affected again. */
553 current->personality &= ~READ_IMPLIES_EXEC;
557 sys_clone(unsigned long clone_flags, unsigned long newsp,
558 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
562 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
565 unsigned long get_wchan(struct task_struct *p)
571 if (!p || p == current || p->state == TASK_RUNNING)
573 stack = (unsigned long)task_stack_page(p);
574 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
576 fp = *(u64 *)(p->thread.sp);
578 if (fp < (unsigned long)stack ||
579 fp >= (unsigned long)stack+THREAD_SIZE)
582 if (!in_sched_functions(ip))
585 } while (count++ < 16);
589 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
592 int doit = task == current;
597 if (addr >= TASK_SIZE_OF(task))
600 /* handle small bases via the GDT because that's faster to
602 if (addr <= 0xffffffff) {
603 set_32bit_tls(task, GS_TLS, addr);
605 load_TLS(&task->thread, cpu);
606 load_gs_index(GS_TLS_SEL);
608 task->thread.gsindex = GS_TLS_SEL;
611 task->thread.gsindex = 0;
612 task->thread.gs = addr;
615 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
621 /* Not strictly needed for fs, but do it for symmetry
623 if (addr >= TASK_SIZE_OF(task))
626 /* handle small bases via the GDT because that's faster to
628 if (addr <= 0xffffffff) {
629 set_32bit_tls(task, FS_TLS, addr);
631 load_TLS(&task->thread, cpu);
632 loadsegment(fs, FS_TLS_SEL);
634 task->thread.fsindex = FS_TLS_SEL;
637 task->thread.fsindex = 0;
638 task->thread.fs = addr;
640 /* set the selector to 0 to not confuse
643 ret = checking_wrmsrl(MSR_FS_BASE, addr);
650 if (task->thread.fsindex == FS_TLS_SEL)
651 base = read_32bit_tls(task, FS_TLS);
653 rdmsrl(MSR_FS_BASE, base);
655 base = task->thread.fs;
656 ret = put_user(base, (unsigned long __user *)addr);
662 if (task->thread.gsindex == GS_TLS_SEL)
663 base = read_32bit_tls(task, GS_TLS);
665 savesegment(gs, gsindex);
667 rdmsrl(MSR_KERNEL_GS_BASE, base);
669 base = task->thread.gs;
671 base = task->thread.gs;
672 ret = put_user(base, (unsigned long __user *)addr);
684 long sys_arch_prctl(int code, unsigned long addr)
686 return do_arch_prctl(current, code, addr);
689 unsigned long KSTK_ESP(struct task_struct *task)
691 return (test_tsk_thread_flag(task, TIF_IA32)) ?
692 (task_pt_regs(task)->sp) : ((task)->thread.usersp);