X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fkernel%2Falternative.c;h=65c7857a90ddfc6ff084c6817baba045ced0ad71;hb=08b882c627aeeeb3cfd3c4354f0d360d7949549d;hp=bd72d94e713e68d75a902b725424c27bb9d39421;hpb=9a163ed8e0552fdcffe405d2ea7134819a81456e;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index bd72d94..65c7857 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include #define MAX_PATCH_LEN (255-1) @@ -62,11 +65,12 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); /* Use inline assembly to define this because the nops are defined as inline assembly strings in the include files and we cannot get them easily into strings. */ -asm("\t.data\nintelnops: " +asm("\t.section .rodata, \"a\"\nintelnops: " GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -extern unsigned char intelnops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { + GENERIC_NOP7 GENERIC_NOP8 + "\t.previous"); +extern const unsigned char intelnops[]; +static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { NULL, intelnops, intelnops + 1, @@ -80,11 +84,12 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = { #endif #ifdef K8_NOP1 -asm("\t.data\nk8nops: " +asm("\t.section .rodata, \"a\"\nk8nops: " K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -extern unsigned char k8nops[]; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { + K8_NOP7 K8_NOP8 + "\t.previous"); +extern const unsigned char k8nops[]; +static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { NULL, k8nops, k8nops + 1, @@ -98,11 +103,12 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = { #endif #ifdef K7_NOP1 -asm("\t.data\nk7nops: " +asm("\t.section .rodata, \"a\"\nk7nops: " K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); -extern unsigned char k7nops[]; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { + K7_NOP7 K7_NOP8 + "\t.previous"); +extern const unsigned char k7nops[]; +static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { NULL, k7nops, k7nops + 1, @@ -115,28 +121,50 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = { }; #endif +#ifdef P6_NOP1 +asm("\t.section .rodata, \"a\"\np6nops: " + P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 + P6_NOP7 P6_NOP8 + "\t.previous"); +extern const unsigned char p6nops[]; +static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { + NULL, + p6nops, + p6nops + 1, + p6nops + 1 + 2, + p6nops + 1 + 2 + 3, + p6nops + 1 + 2 + 3 + 4, + p6nops + 1 + 2 + 3 + 4 + 5, + p6nops + 1 + 2 + 3 + 4 + 5 + 6, + p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +#endif + #ifdef CONFIG_X86_64 extern char __vsyscall_0; -static inline unsigned char** find_nop_table(void) +static inline const unsigned char*const * find_nop_table(void) { - return k8_nops; + return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; } #else /* CONFIG_X86_64 */ -static struct nop { +static const struct nop { int cpuid; - unsigned char **noptable; + const unsigned char *const *noptable; } noptypes[] = { { X86_FEATURE_K8, k8_nops }, { X86_FEATURE_K7, k7_nops }, + { X86_FEATURE_P4, p6_nops }, + { X86_FEATURE_P3, p6_nops }, { -1, NULL } }; -static unsigned char** find_nop_table(void) +static const unsigned char*const * find_nop_table(void) { - unsigned char **noptable = intel_nops; + const unsigned char *const *noptable = intel_nops; int i; for (i = 0; noptypes[i].cpuid >= 0; i++) { @@ -151,9 +179,9 @@ static unsigned char** find_nop_table(void) #endif /* CONFIG_X86_64 */ /* Use this to add nops to a buffer, then text_poke the whole buffer. */ -static void add_nops(void *insns, unsigned int len) +void add_nops(void *insns, unsigned int len) { - unsigned char **noptable = find_nop_table(); + const unsigned char *const *noptable = find_nop_table(); while (len > 0) { unsigned int noplen = len; @@ -164,6 +192,7 @@ static void add_nops(void *insns, unsigned int len) len -= noplen; } } +EXPORT_SYMBOL_GPL(add_nops); extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern u8 *__smp_locks[], *__smp_locks_end[]; @@ -179,7 +208,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) struct alt_instr *a; char insnbuf[MAX_PATCH_LEN]; - DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); + DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); for (a = start; a < end; a++) { u8 *instr = a->instr; BUG_ON(a->replacementlen > a->instrlen); @@ -191,13 +220,13 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); DPRINTK("%s: vsyscall fixup: %p => %p\n", - __FUNCTION__, a->instr, instr); + __func__, a->instr, instr); } #endif memcpy(insnbuf, a->replacement, a->replacementlen); add_nops(insnbuf + a->replacementlen, a->instrlen - a->replacementlen); - text_poke(instr, insnbuf, a->instrlen); + text_poke_early(instr, insnbuf, a->instrlen); } } @@ -251,13 +280,13 @@ struct smp_alt_module { }; static LIST_HEAD(smp_alt_modules); static DEFINE_SPINLOCK(smp_alt); +static int smp_mode = 1; /* protected by smp_alt */ void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) { struct smp_alt_module *smp; - unsigned long flags; if (noreplace_smp) return; @@ -280,74 +309,81 @@ void alternatives_smp_module_add(struct module *mod, char *name, smp->text = text; smp->text_end = text_end; DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", - __FUNCTION__, smp->locks, smp->locks_end, + __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - spin_lock_irqsave(&smp_alt, flags); + spin_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); if (boot_cpu_has(X86_FEATURE_UP)) alternatives_smp_unlock(smp->locks, smp->locks_end, smp->text, smp->text_end); - spin_unlock_irqrestore(&smp_alt, flags); + spin_unlock(&smp_alt); } void alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - unsigned long flags; if (smp_alt_once || noreplace_smp) return; - spin_lock_irqsave(&smp_alt, flags); + spin_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - spin_unlock_irqrestore(&smp_alt, flags); - DPRINTK("%s: %s\n", __FUNCTION__, item->name); + spin_unlock(&smp_alt); + DPRINTK("%s: %s\n", __func__, item->name); kfree(item); return; } - spin_unlock_irqrestore(&smp_alt, flags); + spin_unlock(&smp_alt); } void alternatives_smp_switch(int smp) { struct smp_alt_module *mod; - unsigned long flags; #ifdef CONFIG_LOCKDEP /* - * A not yet fixed binutils section handling bug prevents - * alternatives-replacement from working reliably, so turn - * it off: + * Older binutils section handling bug prevented + * alternatives-replacement from working reliably. + * + * If this still occurs then you should see a hang + * or crash shortly after this line: */ - printk("lockdep: not fixing up alternatives.\n"); - return; + printk("lockdep: fixing up alternatives.\n"); #endif if (noreplace_smp || smp_alt_once) return; BUG_ON(!smp && (num_online_cpus() > 1)); - spin_lock_irqsave(&smp_alt, flags); - if (smp) { + spin_lock(&smp_alt); + + /* + * Avoid unnecessary switches because it forces JIT based VMs to + * throw away all cached translations, which can be quite costly. + */ + if (smp == smp_mode) { + /* nothing */ + } else if (smp) { printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); - clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); + clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); } else { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); + set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_unlock(mod->locks, mod->locks_end, mod->text, mod->text_end); } - spin_unlock_irqrestore(&smp_alt, flags); + smp_mode = smp; + spin_unlock(&smp_alt); } #endif @@ -368,14 +404,14 @@ void apply_paravirt(struct paravirt_patch_site *start, BUG_ON(p->len > MAX_PATCH_LEN); /* prep the buffer with the original instructions */ memcpy(insnbuf, p->instr, p->len); - used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, - (unsigned long)p->instr, p->len); + used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf, + (unsigned long)p->instr, p->len); BUG_ON(used > p->len); /* Pad the rest with nops */ add_nops(insnbuf + used, p->len - used); - text_poke(p->instr, insnbuf, p->len); + text_poke_early(p->instr, insnbuf, p->len); } } extern struct paravirt_patch_site __start_parainstructions[], @@ -384,8 +420,6 @@ extern struct paravirt_patch_site __start_parainstructions[], void __init alternative_instructions(void) { - unsigned long flags; - /* The patching is not fully atomic, so try to avoid local interruptions that might execute the to be patched code. Other CPUs are not running. */ @@ -394,7 +428,6 @@ void __init alternative_instructions(void) stop_mce(); #endif - local_irq_save(flags); apply_alternatives(__alt_instructions, __alt_instructions_end); /* switch to patch-once-at-boottime-only mode and free the @@ -409,23 +442,28 @@ void __init alternative_instructions(void) if (smp_alt_once) { if (1 == num_possible_cpus()) { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); + set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); + alternatives_smp_unlock(__smp_locks, __smp_locks_end, _text, _etext); } - free_init_pages("SMP alternatives", - (unsigned long)__smp_locks, - (unsigned long)__smp_locks_end); } else { alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, _text, _etext); - alternatives_smp_switch(0); + + /* Only switch to UP mode if we don't immediately boot others */ + if (num_possible_cpus() == 1 || setup_max_cpus <= 1) + alternatives_smp_switch(0); } #endif apply_paravirt(__parainstructions, __parainstructions_end); - local_irq_restore(flags); + + if (smp_alt_once) + free_init_pages("SMP alternatives", + (unsigned long)__smp_locks, + (unsigned long)__smp_locks_end); restart_nmi(); #ifdef CONFIG_X86_MCE @@ -433,18 +471,70 @@ void __init alternative_instructions(void) #endif } -/* - * Warning: +/** + * text_poke_early - Update instructions on a live kernel at boot time + * @addr: address to modify + * @opcode: source of the copy + * @len: length to copy + * * When you use this code to patch more than one byte of an instruction * you need to make sure that other CPUs cannot execute this code in parallel. - * Also no thread must be currently preempted in the middle of these instructions. - * And on the local CPU you need to be protected again NMI or MCE handlers - * seeing an inconsistent instruction while you patch. + * Also no thread must be currently preempted in the middle of these + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. */ -void __kprobes text_poke(void *addr, unsigned char *opcode, int len) +void *text_poke_early(void *addr, const void *opcode, size_t len) { + unsigned long flags; + local_irq_save(flags); memcpy(addr, opcode, len); + local_irq_restore(flags); + sync_core(); + /* Could also do a CLFLUSH here to speed up CPU recovery; but + that causes hangs on some VIA CPUs. */ + return addr; +} + +/** + * text_poke - Update instructions on a live kernel + * @addr: address to modify + * @opcode: source of the copy + * @len: length to copy + * + * Only atomic text poke/set should be allowed when not doing early patching. + * It means the size must be writable atomically and the address must be aligned + * in a way that permits an atomic write. It also makes sure we fit on a single + * page. + */ +void *__kprobes text_poke(void *addr, const void *opcode, size_t len) +{ + unsigned long flags; + char *vaddr; + int nr_pages = 2; + struct page *pages[2]; + int i; + + if (!core_kernel_text((unsigned long)addr)) { + pages[0] = vmalloc_to_page(addr); + pages[1] = vmalloc_to_page(addr + PAGE_SIZE); + } else { + pages[0] = virt_to_page(addr); + WARN_ON(!PageReserved(pages[0])); + pages[1] = virt_to_page(addr + PAGE_SIZE); + } + BUG_ON(!pages[0]); + if (!pages[1]) + nr_pages = 1; + vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); + BUG_ON(!vaddr); + local_irq_save(flags); + memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); + local_irq_restore(flags); + vunmap(vaddr); sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ + for (i = 0; i < len; i++) + BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); + return addr; }