From: Ingo Molnar Date: Wed, 31 Dec 2008 07:31:57 +0000 (+0100) Subject: Merge branch 'linus' into stackprotector X-Git-Tag: v2.6.30-rc1~2^2~50^2~67^2~1 X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=commitdiff_plain;h=a9de18eb761f7c1c860964b2e5addc1a35c7e861 Merge branch 'linus' into stackprotector Conflicts: arch/x86/include/asm/pda.h kernel/fork.c --- a9de18eb761f7c1c860964b2e5addc1a35c7e861 diff --cc arch/x86/include/asm/pda.h index 0000000,2fbfff8..3fea2fd mode 000000,100644..100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@@ -1,0 -1,137 +1,137 @@@ + #ifndef _ASM_X86_PDA_H + #define _ASM_X86_PDA_H + + #ifndef __ASSEMBLY__ + #include + #include + #include + #include + + /* Per processor datastructure. %gs points to it while the kernel runs */ + struct x8664_pda { + struct task_struct *pcurrent; /* 0 Current process */ + unsigned long data_offset; /* 8 Per cpu data offset from linker + address */ + unsigned long kernelstack; /* 16 top of kernel stack for current */ + unsigned long oldrsp; /* 24 user rsp for system call */ + int irqcount; /* 32 Irq nesting counter. Starts -1 */ + unsigned int cpunumber; /* 36 Logical CPU number */ -#ifdef CONFIG_CC_STACKPROTECTOR + unsigned long stack_canary; /* 40 stack canary value */ + /* gcc-ABI: this canary MUST be at + offset 40!!! */ -#endif + char *irqstackptr; + short nodenumber; /* number of current node (32k max) */ + short in_bootmem; /* pda lives in bootmem */ + unsigned int __softirq_pending; + unsigned int __nmi_count; /* number of NMI on this CPUs */ + short mmu_state; + short isidle; + struct mm_struct *active_mm; + unsigned apic_timer_irqs; + unsigned irq0_irqs; + unsigned irq_resched_count; + unsigned irq_call_count; + unsigned irq_tlb_count; + unsigned irq_thermal_count; + unsigned irq_threshold_count; + unsigned irq_spurious_count; + } ____cacheline_aligned_in_smp; + + extern struct x8664_pda **_cpu_pda; + extern void pda_init(int); + + #define cpu_pda(i) (_cpu_pda[i]) + + /* + * There is no fast way to get the base address of the PDA, all the accesses + * have to mention %fs/%gs. So it needs to be done this Torvaldian way. + */ + extern void __bad_pda_field(void) __attribute__((noreturn)); + + /* + * proxy_pda doesn't actually exist, but tell gcc it is accessed for + * all PDA accesses so it gets read/write dependencies right. + */ + extern struct x8664_pda _proxy_pda; + + #define pda_offset(field) offsetof(struct x8664_pda, field) + + #define pda_to_op(op, field, val) \ + do { \ + typedef typeof(_proxy_pda.field) T__; \ + if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ + switch (sizeof(_proxy_pda.field)) { \ + case 2: \ + asm(op "w %1,%%gs:%c2" : \ + "+m" (_proxy_pda.field) : \ + "ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + case 4: \ + asm(op "l %1,%%gs:%c2" : \ + "+m" (_proxy_pda.field) : \ + "ri" ((T__)val), \ + "i" (pda_offset(field))); \ + break; \ + case 8: \ + asm(op "q %1,%%gs:%c2": \ + "+m" (_proxy_pda.field) : \ + "ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + default: \ + __bad_pda_field(); \ + } \ + } while (0) + + #define pda_from_op(op, field) \ + ({ \ + typeof(_proxy_pda.field) ret__; \ + switch (sizeof(_proxy_pda.field)) { \ + case 2: \ + asm(op "w %%gs:%c1,%0" : \ + "=r" (ret__) : \ + "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + case 4: \ + asm(op "l %%gs:%c1,%0": \ + "=r" (ret__): \ + "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + case 8: \ + asm(op "q %%gs:%c1,%0": \ + "=r" (ret__) : \ + "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + default: \ + __bad_pda_field(); \ + } \ + ret__; \ + }) + + #define read_pda(field) pda_from_op("mov", field) + #define write_pda(field, val) pda_to_op("mov", field, val) + #define add_pda(field, val) pda_to_op("add", field, val) + #define sub_pda(field, val) pda_to_op("sub", field, val) + #define or_pda(field, val) pda_to_op("or", field, val) + + /* This is not atomic against other CPUs -- CPU preemption needs to be off */ + #define test_and_clear_bit_pda(bit, field) \ + ({ \ + int old__; \ + asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \ + : "=r" (old__), "+m" (_proxy_pda.field) \ + : "dIr" (bit), "i" (pda_offset(field)) : "memory");\ + old__; \ + }) + + #endif + + #define PDA_STACKOFFSET (5*8) + ++#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary) ++ + #endif /* _ASM_X86_PDA_H */ diff --cc arch/x86/include/asm/system.h index 0000000,8e626ea..2f6340a mode 000000,100644..100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@@ -1,0 -1,427 +1,431 @@@ + #ifndef _ASM_X86_SYSTEM_H + #define _ASM_X86_SYSTEM_H + + #include + #include + #include + #include + #include + + #include + #include + + /* entries in ARCH_DLINFO: */ + #ifdef CONFIG_IA32_EMULATION + # define AT_VECTOR_SIZE_ARCH 2 + #else + # define AT_VECTOR_SIZE_ARCH 1 + #endif + + struct task_struct; /* one of the stranger aspects of C forward declarations */ + struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); + + #ifdef CONFIG_X86_32 + + /* + * Saving eflags is important. It switches not only IOPL between tasks, + * it also protects other tasks from NT leaking through sysenter etc. + */ + #define switch_to(prev, next, last) \ + do { \ + /* \ + * Context-switching clobbers all registers, so we clobber \ + * them explicitly, via unused output variables. \ + * (EAX and EBP is not listed because EBP is saved/restored \ + * explicitly for wchan access and EAX is the return value of \ + * __switch_to()) \ + */ \ + unsigned long ebx, ecx, edx, esi, edi; \ + \ + asm volatile("pushfl\n\t" /* save flags */ \ + "pushl %%ebp\n\t" /* save EBP */ \ + "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ + "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ + "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ + "pushl %[next_ip]\n\t" /* restore EIP */ \ + "jmp __switch_to\n" /* regparm call */ \ + "1:\t" \ + "popl %%ebp\n\t" /* restore EBP */ \ + "popfl\n" /* restore flags */ \ + \ + /* output parameters */ \ + : [prev_sp] "=m" (prev->thread.sp), \ + [prev_ip] "=m" (prev->thread.ip), \ + "=a" (last), \ + \ + /* clobbered output registers: */ \ + "=b" (ebx), "=c" (ecx), "=d" (edx), \ + "=S" (esi), "=D" (edi) \ + \ + /* input parameters: */ \ + : [next_sp] "m" (next->thread.sp), \ + [next_ip] "m" (next->thread.ip), \ + \ + /* regparm parameters for __switch_to(): */ \ + [prev] "a" (prev), \ + [next] "d" (next) \ + \ + : /* reloaded segment registers */ \ + "memory"); \ + } while (0) + + /* + * disable hlt during certain critical i/o operations + */ + #define HAVE_DISABLE_HLT + #else + #define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" + #define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" + + /* frame pointer must be last for get_wchan */ + #define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" + #define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" + + #define __EXTRA_CLOBBER \ + , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ + "r12", "r13", "r14", "r15" + + /* Save restore flags to clear handle leaking NT */ + #define switch_to(prev, next, last) \ + asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ + "call __switch_to\n\t" \ + ".globl thread_return\n" \ + "thread_return:\n\t" \ + "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ ++ "movq %P[task_canary](%%rsi),%%r8\n\t" \ ++ "movq %%r8,%%gs:%P[pda_canary]\n\t" \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ + LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + "movq %%rax,%%rdi\n\t" \ + "jc ret_from_fork\n\t" \ + RESTORE_CONTEXT \ + : "=a" (last) \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [tif_fork] "i" (TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ ++ [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\ ++ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \ ++ [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ + : "memory", "cc" __EXTRA_CLOBBER) + #endif + + #ifdef __KERNEL__ + #define _set_base(addr, base) do { unsigned long __pr; \ + __asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %%dl,%2\n\t" \ + "movb %%dh,%3" \ + :"=&d" (__pr) \ + :"m" (*((addr)+2)), \ + "m" (*((addr)+4)), \ + "m" (*((addr)+7)), \ + "0" (base) \ + ); } while (0) + + #define _set_limit(addr, limit) do { unsigned long __lr; \ + __asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %2,%%dh\n\t" \ + "andb $0xf0,%%dh\n\t" \ + "orb %%dh,%%dl\n\t" \ + "movb %%dl,%2" \ + :"=&d" (__lr) \ + :"m" (*(addr)), \ + "m" (*((addr)+6)), \ + "0" (limit) \ + ); } while (0) + + #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) + #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) + + extern void native_load_gs_index(unsigned); + + /* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ + #define loadsegment(seg, value) \ + asm volatile("\n" \ + "1:\t" \ + "movl %k0,%%" #seg "\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\t" \ + "movl %k1, %%" #seg "\n\t" \ + "jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b,3b) \ + : :"r" (value), "r" (0) : "memory") + + + /* + * Save a segment register away + */ + #define savesegment(seg, value) \ + asm("mov %%" #seg ",%0":"=r" (value) : : "memory") + + static inline unsigned long get_limit(unsigned long segment) + { + unsigned long __limit; + asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); + return __limit + 1; + } + + static inline void native_clts(void) + { + asm volatile("clts"); + } + + /* + * Volatile isn't enough to prevent the compiler from reordering the + * read/write functions for the control registers and messing everything up. + * A memory clobber would solve the problem, but would prevent reordering of + * all loads stores around it, which can hurt performance. Solution is to + * use a variable and mimic reads and writes to it to enforce serialization + */ + static unsigned long __force_order; + + static inline unsigned long native_read_cr0(void) + { + unsigned long val; + asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; + } + + static inline void native_write_cr0(unsigned long val) + { + asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); + } + + static inline unsigned long native_read_cr2(void) + { + unsigned long val; + asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; + } + + static inline void native_write_cr2(unsigned long val) + { + asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); + } + + static inline unsigned long native_read_cr3(void) + { + unsigned long val; + asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; + } + + static inline void native_write_cr3(unsigned long val) + { + asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); + } + + static inline unsigned long native_read_cr4(void) + { + unsigned long val; + asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; + } + + static inline unsigned long native_read_cr4_safe(void) + { + unsigned long val; + /* This could fault if %cr4 does not exist. In x86_64, a cr4 always + * exists, so it will never fail. */ + #ifdef CONFIG_X86_32 + asm volatile("1: mov %%cr4, %0\n" + "2:\n" + _ASM_EXTABLE(1b, 2b) + : "=r" (val), "=m" (__force_order) : "0" (0)); + #else + val = native_read_cr4(); + #endif + return val; + } + + static inline void native_write_cr4(unsigned long val) + { + asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); + } + + #ifdef CONFIG_X86_64 + static inline unsigned long native_read_cr8(void) + { + unsigned long cr8; + asm volatile("movq %%cr8,%0" : "=r" (cr8)); + return cr8; + } + + static inline void native_write_cr8(unsigned long val) + { + asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); + } + #endif + + static inline void native_wbinvd(void) + { + asm volatile("wbinvd": : :"memory"); + } + + #ifdef CONFIG_PARAVIRT + #include + #else + #define read_cr0() (native_read_cr0()) + #define write_cr0(x) (native_write_cr0(x)) + #define read_cr2() (native_read_cr2()) + #define write_cr2(x) (native_write_cr2(x)) + #define read_cr3() (native_read_cr3()) + #define write_cr3(x) (native_write_cr3(x)) + #define read_cr4() (native_read_cr4()) + #define read_cr4_safe() (native_read_cr4_safe()) + #define write_cr4(x) (native_write_cr4(x)) + #define wbinvd() (native_wbinvd()) + #ifdef CONFIG_X86_64 + #define read_cr8() (native_read_cr8()) + #define write_cr8(x) (native_write_cr8(x)) + #define load_gs_index native_load_gs_index + #endif + + /* Clear the 'TS' bit */ + #define clts() (native_clts()) + + #endif/* CONFIG_PARAVIRT */ + + #define stts() write_cr0(read_cr0() | X86_CR0_TS) + + #endif /* __KERNEL__ */ + + static inline void clflush(volatile void *__p) + { + asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); + } + + #define nop() asm volatile ("nop") + + void disable_hlt(void); + void enable_hlt(void); + + void cpu_idle_wait(void); + + extern unsigned long arch_align_stack(unsigned long sp); + extern void free_init_pages(char *what, unsigned long begin, unsigned long end); + + void default_idle(void); + + void stop_this_cpu(void *dummy); + + /* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ + #ifdef CONFIG_X86_32 + /* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ + #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) + #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) + #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) + #else + #define mb() asm volatile("mfence":::"memory") + #define rmb() asm volatile("lfence":::"memory") + #define wmb() asm volatile("sfence" ::: "memory") + #endif + + /** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + + #define read_barrier_depends() do { } while (0) + + #ifdef CONFIG_SMP + #define smp_mb() mb() + #ifdef CONFIG_X86_PPRO_FENCE + # define smp_rmb() rmb() + #else + # define smp_rmb() barrier() + #endif + #ifdef CONFIG_X86_OOSTORE + # define smp_wmb() wmb() + #else + # define smp_wmb() barrier() + #endif + #define smp_read_barrier_depends() read_barrier_depends() + #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) + #else + #define smp_mb() barrier() + #define smp_rmb() barrier() + #define smp_wmb() barrier() + #define smp_read_barrier_depends() do { } while (0) + #define set_mb(var, value) do { var = value; barrier(); } while (0) + #endif + + /* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ + static inline void rdtsc_barrier(void) + { + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); + } + + #endif /* _ASM_X86_SYSTEM_H */ diff --cc arch/x86/kernel/Makefile index 50632e1,d364df0..eb07453 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@@ -21,11 -23,10 +23,11 @@@ nostackp := $(call cc-option, -fno-stac CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) +CFLAGS_paravirt.o := $(nostackp) - obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o - obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o - obj-y += time_$(BITS).o ioport.o ldt.o + obj-y := process_$(BITS).o signal.o entry_$(BITS).o + obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o + obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o diff --cc arch/x86/mm/fault.c index d18ea13,57ec8c8..4c056b5 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@@ -589,10 -589,9 +590,11 @@@ void __kprobes do_page_fault(struct pt_ unsigned long address; int write, si_code; int fault; + unsigned long *stackend; + #ifdef CONFIG_X86_64 unsigned long flags; + int sig; #endif tsk = current; diff --cc kernel/fork.c index 99c5c65,43cbf30..913284e --- a/kernel/fork.c +++ b/kernel/fork.c @@@ -58,7 -60,7 +60,8 @@@ #include #include #include + #include +#include #include #include diff --cc kernel/panic.c index e0a87bb,13f0634..3a0b089 --- a/kernel/panic.c +++ b/kernel/panic.c @@@ -357,9 -359,10 +367,12 @@@ EXPORT_SYMBOL(warn_slowpath) */ void __stack_chk_fail(void) { - panic("stack-protector: Kernel stack is corrupted"); + panic("stack-protector: Kernel stack is corrupted in: %p\n", + __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); + #endif + + core_param(panic, panic_timeout, int, 0644); + core_param(pause_on_oops, pause_on_oops, int, 0644);