* 1C(%esp) - %ds
* 20(%esp) - %es
* 24(%esp) - %fs
- * 28(%esp) - orig_eax
- * 2C(%esp) - %eip
- * 30(%esp) - %cs
- * 34(%esp) - %eflags
- * 38(%esp) - %oldesp
- * 3C(%esp) - %oldss
+ * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
+ * 2C(%esp) - orig_eax
+ * 30(%esp) - %eip
+ * 34(%esp) - %cs
+ * 38(%esp) - %eflags
+ * 3C(%esp) - %oldesp
+ * 40(%esp) - %oldss
*
* "current" is in register %ebx during any slow entries.
*/
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
-#include <asm/page.h>
-#include <asm/desc.h>
+#include <asm/page_types.h>
#include <asm/percpu.h>
#include <asm/dwarf2.h>
#include <asm/processor-flags.h>
+#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_LE 0x40000000
+
+#ifndef CONFIG_AUDITSYSCALL
+#define sysenter_audit syscall_trace_entry
+#define sysexit_audit syscall_exit_work
+#endif
+
/*
* We use macros for low-level operations which need to be overridden
* for paravirtualization. The following will never clobber any registers:
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
#define preempt_stop(clobbers)
-#define resume_kernel restore_nocheck
+#define resume_kernel restore_all
#endif
.macro TRACE_IRQS_IRET
#define resume_userspace_sig resume_userspace
#endif
-#define SAVE_ALL \
- cld; \
- pushl %fs; \
- CFI_ADJUST_CFA_OFFSET 4;\
- /*CFI_REL_OFFSET fs, 0;*/\
- pushl %es; \
- CFI_ADJUST_CFA_OFFSET 4;\
- /*CFI_REL_OFFSET es, 0;*/\
- pushl %ds; \
- CFI_ADJUST_CFA_OFFSET 4;\
- /*CFI_REL_OFFSET ds, 0;*/\
- pushl %eax; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET eax, 0;\
- pushl %ebp; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET ebp, 0;\
- pushl %edi; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET edi, 0;\
- pushl %esi; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET esi, 0;\
- pushl %edx; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET edx, 0;\
- pushl %ecx; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET ecx, 0;\
- pushl %ebx; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET ebx, 0;\
- movl $(__USER_DS), %edx; \
- movl %edx, %ds; \
- movl %edx, %es; \
- movl $(__KERNEL_PERCPU), %edx; \
+/*
+ * User gs save/restore
+ *
+ * %gs is used for userland TLS and kernel only uses it for stack
+ * canary which is required to be at %gs:20 by gcc. Read the comment
+ * at the top of stackprotector.h for more info.
+ *
+ * Local labels 98 and 99 are used.
+ */
+#ifdef CONFIG_X86_32_LAZY_GS
+
+ /* unfortunately push/pop can't be no-op */
+.macro PUSH_GS
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+.endm
+.macro POP_GS pop=0
+ addl $(4 + \pop), %esp
+ CFI_ADJUST_CFA_OFFSET -(4 + \pop)
+.endm
+.macro POP_GS_EX
+.endm
+
+ /* all the rest are no-op */
+.macro PTGS_TO_GS
+.endm
+.macro PTGS_TO_GS_EX
+.endm
+.macro GS_TO_REG reg
+.endm
+.macro REG_TO_PTGS reg
+.endm
+.macro SET_KERNEL_GS reg
+.endm
+
+#else /* CONFIG_X86_32_LAZY_GS */
+
+.macro PUSH_GS
+ pushl %gs
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET gs, 0*/
+.endm
+
+.macro POP_GS pop=0
+98: popl %gs
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_RESTORE gs*/
+ .if \pop <> 0
+ add $\pop, %esp
+ CFI_ADJUST_CFA_OFFSET -\pop
+ .endif
+.endm
+.macro POP_GS_EX
+.pushsection .fixup, "ax"
+99: movl $0, (%esp)
+ jmp 98b
+.section __ex_table, "a"
+ .align 4
+ .long 98b, 99b
+.popsection
+.endm
+
+.macro PTGS_TO_GS
+98: mov PT_GS(%esp), %gs
+.endm
+.macro PTGS_TO_GS_EX
+.pushsection .fixup, "ax"
+99: movl $0, PT_GS(%esp)
+ jmp 98b
+.section __ex_table, "a"
+ .align 4
+ .long 98b, 99b
+.popsection
+.endm
+
+.macro GS_TO_REG reg
+ movl %gs, \reg
+ /*CFI_REGISTER gs, \reg*/
+.endm
+.macro REG_TO_PTGS reg
+ movl \reg, PT_GS(%esp)
+ /*CFI_REL_OFFSET gs, PT_GS*/
+.endm
+.macro SET_KERNEL_GS reg
+ movl $(__KERNEL_STACK_CANARY), \reg
+ movl \reg, %gs
+.endm
+
+#endif /* CONFIG_X86_32_LAZY_GS */
+
+.macro SAVE_ALL
+ cld
+ PUSH_GS
+ pushl %fs
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET fs, 0;*/
+ pushl %es
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET es, 0;*/
+ pushl %ds
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ds, 0;*/
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eax, 0
+ pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebp, 0
+ pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
+ pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
+ pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx, 0
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
+ pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
+ movl $(__USER_DS), %edx
+ movl %edx, %ds
+ movl %edx, %es
+ movl $(__KERNEL_PERCPU), %edx
movl %edx, %fs
+ SET_KERNEL_GS %edx
+.endm
-#define RESTORE_INT_REGS \
- popl %ebx; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE ebx;\
- popl %ecx; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE ecx;\
- popl %edx; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE edx;\
- popl %esi; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE esi;\
- popl %edi; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE edi;\
- popl %ebp; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE ebp;\
- popl %eax; \
- CFI_ADJUST_CFA_OFFSET -4;\
+.macro RESTORE_INT_REGS
+ popl %ebx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ebx
+ popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ecx
+ popl %edx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE edx
+ popl %esi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE esi
+ popl %edi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE edi
+ popl %ebp
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ebp
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE eax
+.endm
-#define RESTORE_REGS \
- RESTORE_INT_REGS; \
-1: popl %ds; \
- CFI_ADJUST_CFA_OFFSET -4;\
- /*CFI_RESTORE ds;*/\
-2: popl %es; \
- CFI_ADJUST_CFA_OFFSET -4;\
- /*CFI_RESTORE es;*/\
-3: popl %fs; \
- CFI_ADJUST_CFA_OFFSET -4;\
- /*CFI_RESTORE fs;*/\
-.pushsection .fixup,"ax"; \
-4: movl $0,(%esp); \
- jmp 1b; \
-5: movl $0,(%esp); \
- jmp 2b; \
-6: movl $0,(%esp); \
- jmp 3b; \
-.section __ex_table,"a";\
- .align 4; \
- .long 1b,4b; \
- .long 2b,5b; \
- .long 3b,6b; \
+.macro RESTORE_REGS pop=0
+ RESTORE_INT_REGS
+1: popl %ds
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_RESTORE ds;*/
+2: popl %es
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_RESTORE es;*/
+3: popl %fs
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_RESTORE fs;*/
+ POP_GS \pop
+.pushsection .fixup, "ax"
+4: movl $0, (%esp)
+ jmp 1b
+5: movl $0, (%esp)
+ jmp 2b
+6: movl $0, (%esp)
+ jmp 3b
+.section __ex_table, "a"
+ .align 4
+ .long 1b, 4b
+ .long 2b, 5b
+ .long 3b, 6b
.popsection
+ POP_GS_EX
+.endm
-#define RING0_INT_FRAME \
- CFI_STARTPROC simple;\
- CFI_SIGNAL_FRAME;\
- CFI_DEF_CFA esp, 3*4;\
- /*CFI_OFFSET cs, -2*4;*/\
+.macro RING0_INT_FRAME
+ CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA esp, 3*4
+ /*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4
+.endm
-#define RING0_EC_FRAME \
- CFI_STARTPROC simple;\
- CFI_SIGNAL_FRAME;\
- CFI_DEF_CFA esp, 4*4;\
- /*CFI_OFFSET cs, -2*4;*/\
+.macro RING0_EC_FRAME
+ CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA esp, 4*4
+ /*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4
+.endm
-#define RING0_PTREGS_FRAME \
- CFI_STARTPROC simple;\
- CFI_SIGNAL_FRAME;\
- CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
- /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
- CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
- /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
- /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
- CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
- CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
- CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
- CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
- CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
- CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
+.macro RING0_PTREGS_FRAME
+ CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
+ /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
+ CFI_OFFSET eip, PT_EIP-PT_OLDESP
+ /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
+ /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
+ CFI_OFFSET eax, PT_EAX-PT_OLDESP
+ CFI_OFFSET ebp, PT_EBP-PT_OLDESP
+ CFI_OFFSET edi, PT_EDI-PT_OLDESP
+ CFI_OFFSET esi, PT_ESI-PT_OLDESP
+ CFI_OFFSET edx, PT_EDX-PT_OLDESP
+ CFI_OFFSET ecx, PT_ECX-PT_OLDESP
CFI_OFFSET ebx, PT_EBX-PT_OLDESP
+.endm
ENTRY(ret_from_fork)
CFI_STARTPROC
END(ret_from_fork)
/*
+ * Interrupt exit functions should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
+/*
* Return to user mode is not as complex as all this looks,
* but we want the default path for a system call return to
* go as quickly as possible which is why some of this is
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
- jnz restore_nocheck
+ jnz restore_all
need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
END(resume_kernel)
#endif
CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+ .popsection
/* SYSENTER_RETURN points to after the "sysenter" instruction in
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
GET_THREAD_INFO(%ebp)
- /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
- jnz syscall_trace_entry
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
+ jnz sysenter_audit
+sysenter_do_call:
cmpl $(nr_syscalls), %eax
jae syscall_badsys
call *sys_call_table(,%eax,4)
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
- testw $_TIF_ALLWORK_MASK, %cx
- jne syscall_exit_work
+ testl $_TIF_ALLWORK_MASK, %ecx
+ jne sysexit_audit
+sysenter_exit:
/* if something modifies registers it must also disable sysexit */
movl PT_EIP(%esp), %edx
movl PT_OLDESP(%esp), %ecx
xorl %ebp,%ebp
TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs
+ PTGS_TO_GS
ENABLE_INTERRUPTS_SYSEXIT
+
+#ifdef CONFIG_AUDITSYSCALL
+sysenter_audit:
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ jnz syscall_trace_entry
+ addl $4,%esp
+ CFI_ADJUST_CFA_OFFSET -4
+ /* %esi already in 8(%esp) 6th arg: 4th syscall arg */
+ /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */
+ /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */
+ movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
+ movl %eax,%edx /* 2nd arg: syscall number */
+ movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
+ call audit_syscall_entry
+ pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ movl PT_EAX(%esp),%eax /* reload syscall number */
+ jmp sysenter_do_call
+
+sysexit_audit:
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
+ jne syscall_exit_work
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_ANY)
+ movl %eax,%edx /* second arg, syscall return value */
+ cmpl $0,%eax /* is it < 0? */
+ setl %al /* 1 if so, 0 if not */
+ movzbl %al,%eax /* zero-extend that */
+ inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
+ call audit_syscall_exit
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
+ jne syscall_exit_work
+ movl PT_EAX(%esp),%eax /* reload syscall return value */
+ jmp sysenter_exit
+#endif
+
CFI_ENDPROC
.pushsection .fixup,"ax"
2: movl $0,PT_FS(%esp)
.align 4
.long 1b,2b
.popsection
+ PTGS_TO_GS_EX
ENDPROC(ia32_sysenter_target)
+/*
+ * syscall stub including irq exit should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
# system call handler stub
ENTRY(system_call)
RING0_INT_FRAME # can't unwind into user space anyway
SAVE_ALL
GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation
- /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
# setting need_resched or sigpending
# between sampling and the iret
TRACE_IRQS_OFF
- testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
- jz no_singlestep
- orl $_TIF_SINGLESTEP,TI_flags(%ebp)
-no_singlestep:
movl TI_flags(%ebp), %ecx
- testw $_TIF_ALLWORK_MASK, %cx # current->work
+ testl $_TIF_ALLWORK_MASK, %ecx # current->work
jne syscall_exit_work
restore_all:
+ TRACE_IRQS_IRET
+restore_all_notrace:
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
# are returning to the kernel.
CFI_REMEMBER_STATE
je ldt_ss # returning to user-space with LDT SS
restore_nocheck:
- TRACE_IRQS_IRET
-restore_nocheck_notrace:
- RESTORE_REGS
- addl $4, %esp # skip orig_eax/error_code
+ RESTORE_REGS 4 # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
irq_return:
INTERRUPT_RETURN
jne restore_nocheck
#endif
- /* If returning to userspace with 16bit stack,
- * try to fix the higher word of ESP, as the CPU
- * won't restore it.
- * This is an "official" bug of all the x86-compatible
- * CPUs, which we can try to work around to make
- * dosemu and wine happy. */
- movl PT_OLDESP(%esp), %eax
- movl %esp, %edx
- call patch_espfix_desc
+/*
+ * Setup and switch to ESPFIX stack
+ *
+ * We're returning to userspace with a 16 bit stack. The CPU will not
+ * restore the high word of ESP for us on executing iret... This is an
+ * "official" bug of all the x86-compatible CPUs, which we can work
+ * around to make dosemu and wine happy. We do this by preloading the
+ * high word of ESP with the high word of the userspace ESP while
+ * compensating for the offset by changing to the ESPFIX segment with
+ * a base address that matches for the difference.
+ */
+ mov %esp, %edx /* load kernel esp */
+ mov PT_OLDESP(%esp), %eax /* load userspace esp */
+ mov %dx, %ax /* eax: new kernel esp */
+ sub %eax, %edx /* offset (low word is 0) */
+ PER_CPU(gdt_page, %ebx)
+ shr $16, %edx
+ mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
+ mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
pushl $__ESPFIX_SS
CFI_ADJUST_CFA_OFFSET 4
- pushl %eax
+ push %eax /* new kernel esp */
CFI_ADJUST_CFA_OFFSET 4
+ /* Disable interrupts, but do not irqtrace this section: we
+ * will soon execute iret and the tracer was already set to
+ * the irqstate after the iret */
DISABLE_INTERRUPTS(CLBR_EAX)
- TRACE_IRQS_OFF
- lss (%esp), %esp
+ lss (%esp), %esp /* switch to espfix segment */
CFI_ADJUST_CFA_OFFSET -8
jmp restore_nocheck
CFI_ENDPROC
syscall_trace_entry:
movl $-ENOSYS,PT_EAX(%esp)
movl %esp, %eax
- xorl %edx,%edx
- call do_syscall_trace
- cmpl $0, %eax
- jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
- # so must skip actual syscall
- movl PT_ORIG_EAX(%esp), %eax
+ call syscall_trace_enter
+ /* What it returned is what we'll actually use. */
cmpl $(nr_syscalls), %eax
jnae syscall_call
jmp syscall_exit
# perform syscall exit tracing
ALIGN
syscall_exit_work:
- testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+ testl $_TIF_WORK_SYSCALL_EXIT, %ecx
jz work_pending
TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
+ ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
# schedule() instead
movl %esp, %eax
- movl $1, %edx
- call do_syscall_trace
+ call syscall_trace_leave
jmp resume_userspace
END(syscall_exit_work)
CFI_ENDPROC
jmp resume_userspace
END(syscall_badsys)
CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+ .popsection
-#define FIXUP_ESPFIX_STACK \
- /* since we are on a wrong stack, we cant make it a C code :( */ \
- PER_CPU(gdt_page, %ebx); \
- GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
- addl %esp, %eax; \
- pushl $__KERNEL_DS; \
- CFI_ADJUST_CFA_OFFSET 4; \
+/*
+ * System calls that need a pt_regs pointer.
+ */
+#define PTREGSCALL0(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%eax; \
+ jmp sys_##name;
+
+#define PTREGSCALL1(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%edx; \
+ movl (PT_EBX+4)(%esp),%eax; \
+ jmp sys_##name;
+
+#define PTREGSCALL2(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%ecx; \
+ movl (PT_ECX+4)(%esp),%edx; \
+ movl (PT_EBX+4)(%esp),%eax; \
+ jmp sys_##name;
+
+#define PTREGSCALL3(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%eax; \
pushl %eax; \
- CFI_ADJUST_CFA_OFFSET 4; \
- lss (%esp), %esp; \
- CFI_ADJUST_CFA_OFFSET -8;
-#define UNWIND_ESPFIX_STACK \
- movl %ss, %eax; \
- /* see if on espfix stack */ \
- cmpw $__ESPFIX_SS, %ax; \
- jne 27f; \
- movl $__KERNEL_DS, %eax; \
- movl %eax, %ds; \
- movl %eax, %es; \
- /* switch to normal stack */ \
- FIXUP_ESPFIX_STACK; \
-27:;
+ movl PT_EDX(%eax),%ecx; \
+ movl PT_ECX(%eax),%edx; \
+ movl PT_EBX(%eax),%eax; \
+ call sys_##name; \
+ addl $4,%esp; \
+ ret
+
+PTREGSCALL1(iopl)
+PTREGSCALL0(fork)
+PTREGSCALL0(vfork)
+PTREGSCALL3(execve)
+PTREGSCALL2(sigaltstack)
+PTREGSCALL0(sigreturn)
+PTREGSCALL0(rt_sigreturn)
+PTREGSCALL2(vm86)
+PTREGSCALL1(vm86old)
+
+/* Clone is an oddball. The 4th arg is in %edi */
+ ALIGN;
+ptregs_clone:
+ leal 4(%esp),%eax
+ pushl %eax
+ pushl PT_EDI(%eax)
+ movl PT_EDX(%eax),%ecx
+ movl PT_ECX(%eax),%edx
+ movl PT_EBX(%eax),%eax
+ call sys_clone
+ addl $8,%esp
+ ret
+
+.macro FIXUP_ESPFIX_STACK
+/*
+ * Switch back for ESPFIX stack to the normal zerobased stack
+ *
+ * We can't call C functions using the ESPFIX stack. This code reads
+ * the high word of the segment base from the GDT and swiches to the
+ * normal stack and adjusts ESP with the matching offset.
+ */
+ /* fixup the stack */
+ PER_CPU(gdt_page, %ebx)
+ mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
+ mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
+ shl $16, %eax
+ addl %esp, %eax /* the adjusted stack pointer */
+ pushl $__KERNEL_DS
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ lss (%esp), %esp /* switch to the normal stack segment */
+ CFI_ADJUST_CFA_OFFSET -8
+.endm
+.macro UNWIND_ESPFIX_STACK
+ movl %ss, %eax
+ /* see if on espfix stack */
+ cmpw $__ESPFIX_SS, %ax
+ jne 27f
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ /* switch to normal stack */
+ FIXUP_ESPFIX_STACK
+27:
+.endm
/*
- * Build the entry stubs and pointer table with
- * some assembler magic.
+ * Build the entry stubs and pointer table with some assembler magic.
+ * We pack 7 stubs into a single 32-byte chunk, which will fit in a
+ * single cache line on all modern x86 implementations.
*/
-.section .rodata,"a"
+.section .init.rodata,"a"
ENTRY(interrupt)
.text
-
+ .p2align 5
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
RING0_INT_FRAME
-vector=0
-.rept NR_IRQS
- ALIGN
- .if vector
+vector=FIRST_EXTERNAL_VECTOR
+.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+ .balign 32
+ .rept 7
+ .if vector < NR_VECTORS
+ .if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -4
- .endif
-1: pushl $~(vector)
+ .endif
+1: pushl $(~vector+0x80) /* Note: always in signed byte range */
CFI_ADJUST_CFA_OFFSET 4
- jmp common_interrupt
- .previous
+ .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
+ jmp 2f
+ .endif
+ .previous
.long 1b
- .text
+ .text
vector=vector+1
+ .endif
+ .endr
+2: jmp common_interrupt
.endr
END(irq_entries_start)
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
*/
- ALIGN
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
+ addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
SAVE_ALL
TRACE_IRQS_OFF
movl %esp,%eax
ENDPROC(common_interrupt)
CFI_ENDPROC
-#define BUILD_INTERRUPT(name, nr) \
+/*
+ * Irq entries should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
+#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
pushl $~(nr); \
SAVE_ALL; \
TRACE_IRQS_OFF \
movl %esp,%eax; \
- call smp_##name; \
+ call fn; \
jmp ret_from_intr; \
CFI_ENDPROC; \
ENDPROC(name)
-/* The include is where all of the SMP etc. interrupts come from */
-#include "entry_arch.h"
+#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
-KPROBE_ENTRY(page_fault)
- RING0_EC_FRAME
- pushl $do_page_fault
- CFI_ADJUST_CFA_OFFSET 4
- ALIGN
-error_code:
- /* the function address is in %fs's slot on the stack */
- pushl %es
- CFI_ADJUST_CFA_OFFSET 4
- /*CFI_REL_OFFSET es, 0*/
- pushl %ds
- CFI_ADJUST_CFA_OFFSET 4
- /*CFI_REL_OFFSET ds, 0*/
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET eax, 0
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET ebp, 0
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET esi, 0
- pushl %edx
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET edx, 0
- pushl %ecx
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET ecx, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET ebx, 0
- cld
- pushl %fs
- CFI_ADJUST_CFA_OFFSET 4
- /*CFI_REL_OFFSET fs, 0*/
- movl $(__KERNEL_PERCPU), %ecx
- movl %ecx, %fs
- UNWIND_ESPFIX_STACK
- popl %ecx
- CFI_ADJUST_CFA_OFFSET -4
- /*CFI_REGISTER es, ecx*/
- movl PT_FS(%esp), %edi # get the function address
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
- mov %ecx, PT_FS(%esp)
- /*CFI_REL_OFFSET fs, ES*/
- movl $(__USER_DS), %ecx
- movl %ecx, %ds
- movl %ecx, %es
- movl %esp,%eax # pt_regs pointer
- call *%edi
- jmp ret_from_exception
- CFI_ENDPROC
-KPROBE_END(page_fault)
+/* The include is where all of the SMP etc. interrupts come from */
+#include <asm/entry_arch.h>
ENTRY(coprocessor_error)
RING0_INT_FRAME
RING0_INT_FRAME
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- GET_CR0_INTO_EAX
- testl $0x4, %eax # EM (math emulation bit)
- jne device_not_available_emulate
- preempt_stop(CLBR_ANY)
- call math_state_restore
- jmp ret_from_exception
-device_not_available_emulate:
- pushl $0 # temporary storage for ORIG_EIP
+ pushl $do_device_not_available
CFI_ADJUST_CFA_OFFSET 4
- call math_emulate
- addl $4, %esp
- CFI_ADJUST_CFA_OFFSET -4
- jmp ret_from_exception
+ jmp error_code
CFI_ENDPROC
END(device_not_available)
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-#define FIX_STACK(offset, ok, label) \
- cmpw $__KERNEL_CS,4(%esp); \
- jne ok; \
-label: \
- movl TSS_sysenter_sp0+offset(%esp),%esp; \
- CFI_DEF_CFA esp, 0; \
- CFI_UNDEFINED eip; \
- pushfl; \
- CFI_ADJUST_CFA_OFFSET 4; \
- pushl $__KERNEL_CS; \
- CFI_ADJUST_CFA_OFFSET 4; \
- pushl $sysenter_past_esp; \
- CFI_ADJUST_CFA_OFFSET 4; \
- CFI_REL_OFFSET eip, 0
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_iret)
+ iret
+.section __ex_table,"a"
+ .align 4
+ .long native_iret, iret_exc
+.previous
+END(native_iret)
+
+ENTRY(native_irq_enable_sysexit)
+ sti
+ sysexit
+END(native_irq_enable_sysexit)
+#endif
-KPROBE_ENTRY(debug)
+ENTRY(overflow)
RING0_INT_FRAME
- cmpl $ia32_sysenter_target,(%esp)
- jne debug_stack_correct
- FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
-debug_stack_correct:
- pushl $-1 # mark this as an int
+ pushl $0
CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- xorl %edx,%edx # error code 0
- movl %esp,%eax # pt_regs pointer
- call do_debug
- jmp ret_from_exception
+ pushl $do_overflow
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
CFI_ENDPROC
-KPROBE_END(debug)
-
-/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
- */
-KPROBE_ENTRY(nmi)
- RING0_INT_FRAME
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- movl %ss, %eax
- cmpw $__ESPFIX_SS, %ax
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
- je nmi_espfix_stack
- cmpl $ia32_sysenter_target,(%esp)
- je nmi_stack_fixup
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- movl %esp,%eax
- /* Do not access memory above the end of our stack page,
- * it might not exist.
- */
- andl $(THREAD_SIZE-1),%eax
- cmpl $(THREAD_SIZE-20),%eax
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
- jae nmi_stack_correct
- cmpl $ia32_sysenter_target,12(%esp)
- je nmi_debug_stack_check
-nmi_stack_correct:
- /* We have a RING0_INT_FRAME here */
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- xorl %edx,%edx # zero error code
- movl %esp,%eax # pt_regs pointer
- call do_nmi
- jmp restore_nocheck_notrace
- CFI_ENDPROC
-
-nmi_stack_fixup:
- RING0_INT_FRAME
- FIX_STACK(12,nmi_stack_correct, 1)
- jmp nmi_stack_correct
-
-nmi_debug_stack_check:
- /* We have a RING0_INT_FRAME here */
- cmpw $__KERNEL_CS,16(%esp)
- jne nmi_stack_correct
- cmpl $debug,(%esp)
- jb nmi_stack_correct
- cmpl $debug_esp_fix_insn,(%esp)
- ja nmi_stack_correct
- FIX_STACK(24,nmi_stack_correct, 1)
- jmp nmi_stack_correct
-
-nmi_espfix_stack:
- /* We have a RING0_INT_FRAME here.
- *
- * create the pointer to lss back
- */
- pushl %ss
- CFI_ADJUST_CFA_OFFSET 4
- pushl %esp
- CFI_ADJUST_CFA_OFFSET 4
- addw $4, (%esp)
- /* copy the iret frame of 12 bytes */
- .rept 3
- pushl 16(%esp)
- CFI_ADJUST_CFA_OFFSET 4
- .endr
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- FIXUP_ESPFIX_STACK # %eax == %esp
- xorl %edx,%edx # zero error code
- call do_nmi
- RESTORE_REGS
- lss 12+4(%esp), %esp # back to espfix stack
- CFI_ADJUST_CFA_OFFSET -24
- jmp irq_return
- CFI_ENDPROC
-KPROBE_END(nmi)
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
- iret
-.section __ex_table,"a"
- .align 4
- .long native_iret, iret_exc
-.previous
-END(native_iret)
-
-ENTRY(native_irq_enable_sysexit)
- sti
- sysexit
-END(native_irq_enable_sysexit)
-#endif
-
-KPROBE_ENTRY(int3)
- RING0_INT_FRAME
- pushl $-1 # mark this as an int
- CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- xorl %edx,%edx # zero error code
- movl %esp,%eax # pt_regs pointer
- call do_int3
- jmp ret_from_exception
- CFI_ENDPROC
-KPROBE_END(int3)
-
-ENTRY(overflow)
- RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_overflow
- CFI_ADJUST_CFA_OFFSET 4
- jmp error_code
- CFI_ENDPROC
-END(overflow)
+END(overflow)
ENTRY(bounds)
RING0_INT_FRAME
CFI_ENDPROC
END(stack_segment)
-KPROBE_ENTRY(general_protection)
- RING0_EC_FRAME
- pushl $do_general_protection
- CFI_ADJUST_CFA_OFFSET 4
- jmp error_code
- CFI_ENDPROC
-KPROBE_END(general_protection)
-
ENTRY(alignment_check)
RING0_EC_FRAME
pushl $do_alignment_check
jmp error_code
CFI_ENDPROC
END(spurious_interrupt_bug)
+/*
+ * End of kprobes section
+ */
+ .popsection
ENTRY(kernel_thread_helper)
pushl $0 # fake return address for unwinder
CFI_STARTPROC
- movl %edx,%eax
- push %edx
- CFI_ADJUST_CFA_OFFSET 4
- call *%ebx
- push %eax
- CFI_ADJUST_CFA_OFFSET 4
+ movl %edi,%eax
+ call *%esi
call do_exit
+ ud2 # padding for call trace
CFI_ENDPROC
ENDPROC(kernel_thread_helper)
ENTRY(xen_sysenter_target)
RING0_INT_FRAME
addl $5*4, %esp /* remove xen-provided frame */
+ CFI_ADJUST_CFA_OFFSET -5*4
jmp sysenter_past_esp
+ CFI_ENDPROC
ENTRY(xen_hypervisor_callback)
CFI_STARTPROC
#endif /* CONFIG_XEN */
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ENTRY(mcount)
+ ret
+END(mcount)
+
+ENTRY(ftrace_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+.globl ftrace_call
+ftrace_call:
+ call ftrace_stub
+
+ popl %edx
+ popl %ecx
+ popl %eax
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ jmp ftrace_stub
+#endif
+
+.globl ftrace_stub
+ftrace_stub:
+ ret
+END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(mcount)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ cmpl $ftrace_stub, ftrace_trace_function
+ jnz trace
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ cmpl $ftrace_stub, ftrace_graph_return
+ jnz ftrace_graph_caller
+
+ cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
+ jnz ftrace_graph_caller
+#endif
+.globl ftrace_stub
+ftrace_stub:
+ ret
+
+ /* taken from glibc */
+trace:
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+ call *ftrace_trace_function
+
+ popl %edx
+ popl %ecx
+ popl %eax
+ jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %edx
+ lea 0x4(%ebp), %eax
+ movl (%ebp), %ecx
+ subl $MCOUNT_INSN_SIZE, %edx
+ call prepare_ftrace_return
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+END(ftrace_graph_caller)
+
+.globl return_to_handler
+return_to_handler:
+ pushl %eax
+ pushl %edx
+ movl %ebp, %eax
+ call ftrace_return_to_handler
+ movl %eax, %ecx
+ popl %edx
+ popl %eax
+ jmp *%ecx
+#endif
+
.section .rodata,"a"
#include "syscall_table_32.S"
syscall_table_size=(.-sys_call_table)
+
+/*
+ * Some functions should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
+
+ENTRY(page_fault)
+ RING0_EC_FRAME
+ pushl $do_page_fault
+ CFI_ADJUST_CFA_OFFSET 4
+ ALIGN
+error_code:
+ /* the function address is in %gs's slot on the stack */
+ pushl %fs
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET fs, 0*/
+ pushl %es
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET es, 0*/
+ pushl %ds
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ds, 0*/
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eax, 0
+ pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebp, 0
+ pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
+ pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
+ pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx, 0
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
+ pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
+ cld
+ movl $(__KERNEL_PERCPU), %ecx
+ movl %ecx, %fs
+ UNWIND_ESPFIX_STACK
+ GS_TO_REG %ecx
+ movl PT_GS(%esp), %edi # get the function address
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+ REG_TO_PTGS %ecx
+ SET_KERNEL_GS %ecx
+ movl $(__USER_DS), %ecx
+ movl %ecx, %ds
+ movl %ecx, %es
+ TRACE_IRQS_OFF
+ movl %esp,%eax # pt_regs pointer
+ call *%edi
+ jmp ret_from_exception
+ CFI_ENDPROC
+END(page_fault)
+
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+.macro FIX_STACK offset ok label
+ cmpw $__KERNEL_CS, 4(%esp)
+ jne \ok
+\label:
+ movl TSS_sysenter_sp0 + \offset(%esp), %esp
+ CFI_DEF_CFA esp, 0
+ CFI_UNDEFINED eip
+ pushfl
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $__KERNEL_CS
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $sysenter_past_esp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eip, 0
+.endm
+
+ENTRY(debug)
+ RING0_INT_FRAME
+ cmpl $ia32_sysenter_target,(%esp)
+ jne debug_stack_correct
+ FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
+debug_stack_correct:
+ pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ xorl %edx,%edx # error code 0
+ movl %esp,%eax # pt_regs pointer
+ call do_debug
+ jmp ret_from_exception
+ CFI_ENDPROC
+END(debug)
+
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+ RING0_INT_FRAME
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ movl %ss, %eax
+ cmpw $__ESPFIX_SS, %ax
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
+ je nmi_espfix_stack
+ cmpl $ia32_sysenter_target,(%esp)
+ je nmi_stack_fixup
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ movl %esp,%eax
+ /* Do not access memory above the end of our stack page,
+ * it might not exist.
+ */
+ andl $(THREAD_SIZE-1),%eax
+ cmpl $(THREAD_SIZE-20),%eax
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
+ jae nmi_stack_correct
+ cmpl $ia32_sysenter_target,12(%esp)
+ je nmi_debug_stack_check
+nmi_stack_correct:
+ /* We have a RING0_INT_FRAME here */
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_nmi
+ jmp restore_all_notrace
+ CFI_ENDPROC
+
+nmi_stack_fixup:
+ RING0_INT_FRAME
+ FIX_STACK 12, nmi_stack_correct, 1
+ jmp nmi_stack_correct
+
+nmi_debug_stack_check:
+ /* We have a RING0_INT_FRAME here */
+ cmpw $__KERNEL_CS,16(%esp)
+ jne nmi_stack_correct
+ cmpl $debug,(%esp)
+ jb nmi_stack_correct
+ cmpl $debug_esp_fix_insn,(%esp)
+ ja nmi_stack_correct
+ FIX_STACK 24, nmi_stack_correct, 1
+ jmp nmi_stack_correct
+
+nmi_espfix_stack:
+ /* We have a RING0_INT_FRAME here.
+ *
+ * create the pointer to lss back
+ */
+ pushl %ss
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %esp
+ CFI_ADJUST_CFA_OFFSET 4
+ addl $4, (%esp)
+ /* copy the iret frame of 12 bytes */
+ .rept 3
+ pushl 16(%esp)
+ CFI_ADJUST_CFA_OFFSET 4
+ .endr
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ FIXUP_ESPFIX_STACK # %eax == %esp
+ xorl %edx,%edx # zero error code
+ call do_nmi
+ RESTORE_REGS
+ lss 12+4(%esp), %esp # back to espfix stack
+ CFI_ADJUST_CFA_OFFSET -24
+ jmp irq_return
+ CFI_ENDPROC
+END(nmi)
+
+ENTRY(int3)
+ RING0_INT_FRAME
+ pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_int3
+ jmp ret_from_exception
+ CFI_ENDPROC
+END(int3)
+
+ENTRY(general_protection)
+ RING0_EC_FRAME
+ pushl $do_general_protection
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+END(general_protection)
+
+/*
+ * End of kprobes section
+ */
+ .popsection