* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- *
- * $Id$
*/
/*
* at the top of the kernel process stack.
* - partial stack frame: partially saved registers upto R11.
* - full stack frame: Like partial stack frame, but all register saved.
- *
- * TODO:
- * - schedule it carefully for the final hardware.
+ *
+ * Some macro usage:
+ * - CFI macros are used to generate dwarf2 unwind information for better
+ * backtraces. They don't change any code.
+ * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
+ * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
+ * There are unfortunately lots of special cases where some registers
+ * not touched. The macro is a big mess that should be cleaned up.
+ * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
+ * Gives a full stack frame.
+ * - ENTRY/END Define functions in the symbol table.
+ * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
+ * frame that is otherwise undefined after a SYSCALL
+ * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
+ * - errorentry/paranoidentry/zeroentry - Define exception entry points.
*/
#include <linux/linkage.h>
.macro CFI_DEFAULT_STACK start=1
.if \start
CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8
.else
CFI_DEF_CFA_OFFSET SS+8
/* rdi: prev */
ENTRY(ret_from_fork)
CFI_DEFAULT_STACK
+ push kernel_eflags(%rip)
+ CFI_ADJUST_CFA_OFFSET 4
+ popf # reset kernel eflags
+ CFI_ADJUST_CFA_OFFSET -4
call schedule_tail
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
ENTRY(system_call)
CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,PDA_STACKOFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
- CFI_REMEMBER_STATE
jnz tracesys
cmpq $__NR_syscall_max,%rax
ja badsys
* Syscall return path ending with SYSRET (fast path)
* Has incomplete stack frame and undefined top of stack.
*/
- .globl ret_from_sys_call
ret_from_sys_call:
movl $_TIF_ALLWORK_MASK,%edi
/* edi: flagmask */
TRACE_IRQS_OFF
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
- CFI_REMEMBER_STATE
jnz sysret_careful
+ CFI_REMEMBER_STATE
/*
* sysretq will re-enable interrupts:
*/
swapgs
sysretq
+ CFI_RESTORE_STATE
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
- CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
TRACE_IRQS_ON
sysret_signal:
TRACE_IRQS_ON
sti
- testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+ testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
jz 1f
/* Really a signal */
/* Do syscall tracing */
tracesys:
- CFI_RESTORE_STATE
SAVE_REST
movq $-ENOSYS,RAX(%rsp)
FIXUP_TOP_OF_STACK %rdi
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
cmpq $__NR_syscall_max,%rax
+ movq $-ENOSYS,%rcx
+ cmova %rcx,%rax
ja 1f
movq %r10,%rcx /* fixup for C */
call *sys_call_table(,%rax,8)
1: movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
- jmp int_ret_from_sys_call
- CFI_ENDPROC
-END(system_call)
/*
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
- */
-ENTRY(int_ret_from_sys_call)
- CFI_STARTPROC simple
- CFI_DEF_CFA rsp,SS+8-ARGOFFSET
- /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
- CFI_REL_OFFSET rsp,RSP-ARGOFFSET
- /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
- /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
- CFI_REL_OFFSET rdx,RDX-ARGOFFSET
- CFI_REL_OFFSET rcx,RCX-ARGOFFSET
- CFI_REL_OFFSET rax,RAX-ARGOFFSET
- CFI_REL_OFFSET rdi,RDI-ARGOFFSET
- CFI_REL_OFFSET rsi,RSI-ARGOFFSET
- CFI_REL_OFFSET r8,R8-ARGOFFSET
- CFI_REL_OFFSET r9,R9-ARGOFFSET
- CFI_REL_OFFSET r10,R10-ARGOFFSET
- CFI_REL_OFFSET r11,R11-ARGOFFSET
+ */
+ .globl int_ret_from_sys_call
+int_ret_from_sys_call:
cli
TRACE_IRQS_OFF
testl $3,CS-ARGOFFSET(%rsp)
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
- cli
- TRACE_IRQS_OFF
jmp int_restore_rest
int_signal:
- testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
+ testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
jz 1f
movq %rsp,%rdi # &ptregs -> arg1
xorl %esi,%esi # oldset -> arg2
TRACE_IRQS_OFF
jmp int_with_check
CFI_ENDPROC
-END(int_ret_from_sys_call)
+END(system_call)
/*
* Certain special system calls that need to save a complete full stack frame.
*/
.macro _frame ref
CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8-\ref
/*CFI_REL_OFFSET ss,SS-\ref*/
CFI_REL_OFFSET rsp,RSP-\ref
testl $3,CS(%rdi)
je 1f
swapgs
-1: incl %gs:pda_irqcount # RED-PEN should check preempt count
+ /* irqcount is used to check if a CPU is already on an interrupt
+ stack or not. While this is essentially redundant with preempt_count
+ it is a little cheaper to use a separate counter in the PDA
+ (short of moving irq_enter into assembly, which would be too
+ much work) */
+1: incl %gs:pda_irqcount
cmoveq %gs:pda_irqstackptr,%rsp
push %rbp # backlink for old unwinder
/*
jmp retint_check
retint_signal:
- testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+ testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
jz retint_swapgs
TRACE_IRQS_ON
sti
#ifdef CONFIG_PREEMPT
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
- .p2align
-retint_kernel:
+ENTRY(retint_kernel)
cmpl $0,threadinfo_preempt_count(%rcx)
jnz retint_restore_args
bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
END(call_function_interrupt)
+ENTRY(irq_move_cleanup_interrupt)
+ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
+END(irq_move_cleanup_interrupt)
#endif
-#ifdef CONFIG_X86_LOCAL_APIC
ENTRY(apic_timer_interrupt)
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
END(apic_timer_interrupt)
ENTRY(spurious_interrupt)
apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
END(spurious_interrupt)
-#endif
/*
* Exception entry points.
CFI_ADJUST_CFA_OFFSET 8
pushq %rax /* push real oldrax to the rdi slot */
CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rax,0
leaq \sym(%rip),%rax
jmp error_entry
CFI_ENDPROC
XCPT_FRAME
pushq %rax
CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rax,0
leaq \sym(%rip),%rax
jmp error_entry
CFI_ENDPROC
testl $3,CS(%rsp)
jnz paranoid_userspace\trace
paranoid_swapgs\trace:
+ .if \trace
TRACE_IRQS_IRETQ 0
+ .endif
swapgs
paranoid_restore\trace:
RESTORE_ALL 8
* Exception entry point. This expects an error code/orig_rax on the stack
* and the exception handler in %rax.
*/
-ENTRY(error_entry)
+KPROBE_ENTRY(error_entry)
_frame RDI
+ CFI_REL_OFFSET rax,0
/* rdi slot contains rax, oldrax contains error code */
cld
subq $14*8,%rsp
movq %rsi,13*8(%rsp)
CFI_REL_OFFSET rsi,RSI
movq 14*8(%rsp),%rsi /* load rax from rdi slot */
+ CFI_REGISTER rax,rsi
movq %rdx,12*8(%rsp)
CFI_REL_OFFSET rdx,RDX
movq %rcx,11*8(%rsp)
swapgs
error_sti:
movq %rdi,RDI(%rsp)
+ CFI_REL_OFFSET rdi,RDI
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
cmpq $gs_change,RIP(%rsp)
je error_swapgs
jmp error_sti
-END(error_entry)
+KPROBE_END(error_entry)
/* Reload gs selector with exception handling */
/* edi: new selector */
* do_sys_execve asm fallback arguments:
* rdi: name, rsi: argv, rdx: envp, fake frame on the stack
*/
-ENTRY(execve)
+ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
SAVE_ALL
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
-ENDPROC(execve)
+ENDPROC(kernel_execve)
KPROBE_ENTRY(page_fault)
errorentry do_page_fault
-END(page_fault)
- .previous .text
+KPROBE_END(page_fault)
ENTRY(coprocessor_error)
zeroentry do_coprocessor_error
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug, DEBUG_STACK
paranoidexit
-END(debug)
- .previous .text
+KPROBE_END(debug)
/* runs on exception stack */
KPROBE_ENTRY(nmi)
jmp paranoid_exit1
CFI_ENDPROC
#endif
-END(nmi)
- .previous .text
+KPROBE_END(nmi)
KPROBE_ENTRY(int3)
INTR_FRAME
paranoidentry do_int3, DEBUG_STACK
jmp paranoid_exit1
CFI_ENDPROC
-END(int3)
- .previous .text
+KPROBE_END(int3)
ENTRY(overflow)
zeroentry do_overflow
KPROBE_ENTRY(general_protection)
errorentry do_general_protection
-END(general_protection)
- .previous .text
+KPROBE_END(general_protection)
ENTRY(alignment_check)
errorentry do_alignment_check
CFI_ENDPROC
ENDPROC(call_softirq)
-#ifdef CONFIG_STACK_UNWIND
-ENTRY(arch_unwind_init_running)
+KPROBE_ENTRY(ignore_sysret)
CFI_STARTPROC
- movq %r15, R15(%rdi)
- movq %r14, R14(%rdi)
- xchgq %rsi, %rdx
- movq %r13, R13(%rdi)
- movq %r12, R12(%rdi)
- xorl %eax, %eax
- movq %rbp, RBP(%rdi)
- movq %rbx, RBX(%rdi)
- movq (%rsp), %rcx
- movq %rax, R11(%rdi)
- movq %rax, R10(%rdi)
- movq %rax, R9(%rdi)
- movq %rax, R8(%rdi)
- movq %rax, RAX(%rdi)
- movq %rax, RCX(%rdi)
- movq %rax, RDX(%rdi)
- movq %rax, RSI(%rdi)
- movq %rax, RDI(%rdi)
- movq %rax, ORIG_RAX(%rdi)
- movq %rcx, RIP(%rdi)
- leaq 8(%rsp), %rcx
- movq $__KERNEL_CS, CS(%rdi)
- movq %rax, EFLAGS(%rdi)
- movq %rcx, RSP(%rdi)
- movq $__KERNEL_DS, SS(%rdi)
- jmpq *%rdx
+ mov $-ENOSYS,%eax
+ sysret
CFI_ENDPROC
-ENDPROC(arch_unwind_init_running)
-#endif
+ENDPROC(ignore_sysret)