X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=kernel%2Fmodule.c;h=8c6b42840dd19fa942919aa8e052bf400911a2a4;hp=b862fdb6a3723e4f82aaadb49aa10fbc2f881877;hb=254c8c2dbf0e06a560a5814eb90cb628adb2de66;hpb=e180a6b7759a99a28cbcce3547c4c80822cb6c2a diff --git a/kernel/module.c b/kernel/module.c index b862fdb..8c6b428 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -18,6 +18,7 @@ */ #include #include +#include #include #include #include @@ -46,12 +47,17 @@ #include #include #include +#include #include #include #include #include #include #include +#include + +#define CREATE_TRACE_POINTS +#include #if 0 #define DEBUGP printk @@ -66,17 +72,30 @@ /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) -/* List of modules, protected by module_mutex or preempt_disable +/* + * Mutex protects: + * 1) List of modules (also safely readable with preempt_disable), + * 2) module_use links, + * 3) module_addr_min/module_addr_max. * (delete uses stop_machine/add uses RCU list operations). */ -static DEFINE_MUTEX(module_mutex); +DEFINE_MUTEX(module_mutex); +EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); +#ifdef CONFIG_KGDB_KDB +struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ +#endif /* CONFIG_KGDB_KDB */ + + +/* Block module loading/unloading? */ +int modules_disabled = 0; /* Waiting for a module to finish initializing? */ static DECLARE_WAIT_QUEUE_HEAD(module_wq); static BLOCKING_NOTIFIER_HEAD(module_notify_list); -/* Bounds of module allocation, for speeding __module_text_address */ +/* Bounds of module allocation, for speeding __module_address. + * Protected by module_mutex. */ static unsigned long module_addr_min = -1UL, module_addr_max = 0; int register_module_notifier(struct notifier_block * nb) @@ -166,8 +185,6 @@ extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; extern const struct kernel_symbol __start___ksymtab_gpl_future[]; extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; -extern const struct kernel_symbol __start___ksymtab_gpl_future[]; -extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; extern const unsigned long __start___kcrctab[]; extern const unsigned long __start___kcrctab_gpl[]; extern const unsigned long __start___kcrctab_gpl_future[]; @@ -186,17 +203,6 @@ extern const unsigned long __start___kcrctab_unused_gpl[]; #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) #endif -struct symsearch { - const struct kernel_symbol *start, *stop; - const unsigned long *crcs; - enum { - NOT_GPL_ONLY, - GPL_ONLY, - WILL_BE_GPL_ONLY, - } licence; - bool unused; -}; - static bool each_symbol_in_section(const struct symsearch *arr, unsigned int arrsize, struct module *owner, @@ -217,10 +223,8 @@ static bool each_symbol_in_section(const struct symsearch *arr, } /* Returns true as soon as fn returns true, otherwise false. */ -static bool each_symbol(bool (*fn)(const struct symsearch *arr, - struct module *owner, - unsigned int symnum, void *data), - void *data) +bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, + unsigned int symnum, void *data), void *data) { struct module *mod; const struct symsearch arr[] = { @@ -273,6 +277,7 @@ static bool each_symbol(bool (*fn)(const struct symsearch *arr, } return false; } +EXPORT_SYMBOL_GPL(each_symbol); struct find_symbol_arg { /* Input */ @@ -283,7 +288,7 @@ struct find_symbol_arg { /* Output */ struct module *owner; const unsigned long *crc; - unsigned long value; + const struct kernel_symbol *sym; }; static bool find_symbol_in_section(const struct symsearch *syms, @@ -324,17 +329,17 @@ static bool find_symbol_in_section(const struct symsearch *syms, fsa->owner = owner; fsa->crc = symversion(syms->crcs, symnum); - fsa->value = syms->start[symnum].value; + fsa->sym = &syms->start[symnum]; return true; } -/* Find a symbol, return value, (optional) crc and (optional) module - * which owns it */ -static unsigned long find_symbol(const char *name, - struct module **owner, - const unsigned long **crc, - bool gplok, - bool warn) +/* Find a symbol and return it, along with, (optional) crc and + * (optional) module which owns it. Needs preempt disabled or module_mutex. */ +const struct kernel_symbol *find_symbol(const char *name, + struct module **owner, + const unsigned long **crc, + bool gplok, + bool warn) { struct find_symbol_arg fsa; @@ -347,15 +352,16 @@ static unsigned long find_symbol(const char *name, *owner = fsa.owner; if (crc) *crc = fsa.crc; - return fsa.value; + return fsa.sym; } DEBUGP("Failed to find symbol %s\n", name); - return -ENOENT; + return NULL; } +EXPORT_SYMBOL_GPL(find_symbol); /* Search for module by name: must hold module_mutex. */ -static struct module *find_module(const char *name) +struct module *find_module(const char *name) { struct module *mod; @@ -365,196 +371,102 @@ static struct module *find_module(const char *name) } return NULL; } +EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA - -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { - void *ptr; + return mod->percpu; +} +static int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ if (align > PAGE_SIZE) { printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); + mod->name, align, PAGE_SIZE); align = PAGE_SIZE; } - ptr = __alloc_reserved_percpu(size, align); - if (!ptr) + mod->percpu = __alloc_reserved_percpu(size, align); + if (!mod->percpu) { printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); - return ptr; + return -ENOMEM; + } + mod->percpu_size = size; + return 0; } -static void percpu_modfree(void *freeme) +static void percpu_modfree(struct module *mod) { - free_percpu(freeme); + free_percpu(mod->percpu); } -#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ - -/* Number of blocks used and allocated. */ -static unsigned int pcpu_num_used, pcpu_num_allocated; -/* Size of each block. -ve means used. */ -static int *pcpu_size; - -static int split_block(unsigned int i, unsigned short size) +static unsigned int find_pcpusec(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings) { - /* Reallocation required? */ - if (pcpu_num_used + 1 > pcpu_num_allocated) { - int *new; - - new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2, - GFP_KERNEL); - if (!new) - return 0; - - pcpu_num_allocated *= 2; - pcpu_size = new; - } - - /* Insert a new subblock */ - memmove(&pcpu_size[i+1], &pcpu_size[i], - sizeof(pcpu_size[0]) * (pcpu_num_used - i)); - pcpu_num_used++; - - pcpu_size[i+1] -= size; - pcpu_size[i] = size; - return 1; + return find_sec(hdr, sechdrs, secstrings, ".data..percpu"); } -static inline unsigned int block_size(int val) +static void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { - if (val < 0) - return -val; - return val; + int cpu; + + for_each_possible_cpu(cpu) + memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +/** + * is_module_percpu_address - test whether address is from module static percpu + * @addr: address to test + * + * Test whether @addr belongs to module static percpu area. + * + * RETURNS: + * %true if @addr is from module static percpu area + */ +bool is_module_percpu_address(unsigned long addr) { - unsigned long extra; - unsigned int i; - void *ptr; - - if (align > PAGE_SIZE) { - printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); - align = PAGE_SIZE; - } + struct module *mod; + unsigned int cpu; - ptr = __per_cpu_start; - for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { - /* Extra for alignment requirement. */ - extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; - BUG_ON(i == 0 && extra != 0); + preempt_disable(); - if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size) + list_for_each_entry_rcu(mod, &modules, list) { + if (!mod->percpu_size) continue; + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(mod->percpu, cpu); - /* Transfer extra to previous block. */ - if (pcpu_size[i-1] < 0) - pcpu_size[i-1] -= extra; - else - pcpu_size[i-1] += extra; - pcpu_size[i] -= extra; - ptr += extra; - - /* Split block if warranted */ - if (pcpu_size[i] - size > sizeof(unsigned long)) - if (!split_block(i, size)) - return NULL; - - /* Mark allocated */ - pcpu_size[i] = -pcpu_size[i]; - return ptr; - } - - printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", - size); - return NULL; -} - -static void percpu_modfree(void *freeme) -{ - unsigned int i; - void *ptr = __per_cpu_start + block_size(pcpu_size[0]); - - /* First entry is core kernel percpu data. */ - for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { - if (ptr == freeme) { - pcpu_size[i] = -pcpu_size[i]; - goto free; + if ((void *)addr >= start && + (void *)addr < start + mod->percpu_size) { + preempt_enable(); + return true; + } } } - BUG(); - - free: - /* Merge with previous? */ - if (pcpu_size[i-1] >= 0) { - pcpu_size[i-1] += pcpu_size[i]; - pcpu_num_used--; - memmove(&pcpu_size[i], &pcpu_size[i+1], - (pcpu_num_used - i) * sizeof(pcpu_size[0])); - i--; - } - /* Merge with next? */ - if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) { - pcpu_size[i] += pcpu_size[i+1]; - pcpu_num_used--; - memmove(&pcpu_size[i+1], &pcpu_size[i+2], - (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0])); - } -} - -static int percpu_modinit(void) -{ - pcpu_num_used = 2; - pcpu_num_allocated = 2; - pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, - GFP_KERNEL); - /* Static in-kernel percpu data (used). */ - pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); - /* Free room. */ - pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; - if (pcpu_size[1] < 0) { - printk(KERN_ERR "No per-cpu room for modules.\n"); - pcpu_num_used = 1; - } - - return 0; -} -__initcall(percpu_modinit); -#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ - -static unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) -{ - return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); -} - -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) -{ - int cpu; - - for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); + preempt_enable(); + return false; } #else /* ... !CONFIG_SMP */ -static inline void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { return NULL; } -static inline void percpu_modfree(void *pcpuptr) +static inline int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ + return -ENOMEM; +} +static inline void percpu_modfree(struct module *mod) { - BUG(); } static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -562,12 +474,16 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, { return 0; } -static inline void percpu_modcopy(void *pcpudst, const void *src, - unsigned long size) +static inline void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } +bool is_module_percpu_address(unsigned long addr) +{ + return false; +} #endif /* CONFIG_SMP */ @@ -604,34 +520,34 @@ MODINFO_ATTR(srcversion); static char last_unloaded_module[MODULE_NAME_LEN+1]; #ifdef CONFIG_MODULE_UNLOAD + +EXPORT_TRACEPOINT_SYMBOL(module_get); + /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { int cpu; - INIT_LIST_HEAD(&mod->modules_which_use_me); - for_each_possible_cpu(cpu) - local_set(__module_ref_addr(mod, cpu), 0); + INIT_LIST_HEAD(&mod->source_list); + INIT_LIST_HEAD(&mod->target_list); + for_each_possible_cpu(cpu) { + per_cpu_ptr(mod->refptr, cpu)->incs = 0; + per_cpu_ptr(mod->refptr, cpu)->decs = 0; + } + /* Hold reference count during initialization. */ - local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); + __this_cpu_write(mod->refptr->incs, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } -/* modules using other modules */ -struct module_use -{ - struct list_head list; - struct module *module_which_uses; -}; - /* Does a already use b? */ static int already_uses(struct module *a, struct module *b) { struct module_use *use; - list_for_each_entry(use, &b->modules_which_use_me, list) { - if (use->module_which_uses == a) { + list_for_each_entry(use, &b->source_list, source_list) { + if (use->source == a) { DEBUGP("%s uses %s!\n", a->name, b->name); return 1; } @@ -640,61 +556,68 @@ static int already_uses(struct module *a, struct module *b) return 0; } -/* Module a uses b */ -static int use_module(struct module *a, struct module *b) +/* + * Module a uses b + * - we add 'a' as a "source", 'b' as a "target" of module use + * - the module_use is added to the list of 'b' sources (so + * 'b' can walk the list to see who sourced them), and of 'a' + * targets (so 'a' can see what modules it targets). + */ +static int add_module_usage(struct module *a, struct module *b) { struct module_use *use; - int no_warn, err; - if (b == NULL || already_uses(a, b)) return 1; + DEBUGP("Allocating new usage for %s.\n", a->name); + use = kmalloc(sizeof(*use), GFP_ATOMIC); + if (!use) { + printk(KERN_WARNING "%s: out of memory loading\n", a->name); + return -ENOMEM; + } + + use->source = a; + use->target = b; + list_add(&use->source_list, &b->source_list); + list_add(&use->target_list, &a->target_list); + return 0; +} + +/* Module a uses b: caller needs module_mutex() */ +int ref_module(struct module *a, struct module *b) +{ + int err; - /* If we're interrupted or time out, we fail. */ - if (wait_event_interruptible_timeout( - module_wq, (err = strong_try_module_get(b)) != -EBUSY, - 30 * HZ) <= 0) { - printk("%s: gave up waiting for init of module %s.\n", - a->name, b->name); + if (b == NULL || already_uses(a, b)) return 0; - } - /* If strong_try_module_get() returned a different error, we fail. */ + /* If module isn't available, we fail. */ + err = strong_try_module_get(b); if (err) - return 0; + return err; - DEBUGP("Allocating new usage for %s.\n", a->name); - use = kmalloc(sizeof(*use), GFP_ATOMIC); - if (!use) { - printk("%s: out of memory loading\n", a->name); + err = add_module_usage(a, b); + if (err) { module_put(b); - return 0; + return err; } - - use->module_which_uses = a; - list_add(&use->list, &b->modules_which_use_me); - no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); - return 1; + return 0; } +EXPORT_SYMBOL_GPL(ref_module); /* Clear the unload stuff of the module. */ static void module_unload_free(struct module *mod) { - struct module *i; - - list_for_each_entry(i, &modules, list) { - struct module_use *use; + struct module_use *use, *tmp; - list_for_each_entry(use, &i->modules_which_use_me, list) { - if (use->module_which_uses == mod) { - DEBUGP("%s unusing %s\n", mod->name, i->name); - module_put(i); - list_del(&use->list); - kfree(use); - sysfs_remove_link(i->holders_dir, mod->name); - /* There can be at most one match. */ - break; - } - } + mutex_lock(&module_mutex); + list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { + struct module *i = use->target; + DEBUGP("%s unusing %s\n", mod->name, i->name); + module_put(i); + list_del(&use->source_list); + list_del(&use->target_list); + kfree(use); } + mutex_unlock(&module_mutex); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -751,12 +674,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int total = 0; + unsigned int incs = 0, decs = 0; int cpu; for_each_possible_cpu(cpu) - total += local_read(__module_ref_addr(mod, cpu)); - return total; + decs += per_cpu_ptr(mod->refptr, cpu)->decs; + /* + * ensure the incs are added up after the decs. + * module_put ensures incs are visible before decs with smp_wmb. + * + * This 2-count scheme avoids the situation where the refcount + * for CPU0 is read, then CPU0 increments the module refcount, + * then CPU1 drops that refcount, then the refcount for CPU1 is + * read. We would record a decrement but not its corresponding + * increment so we would see a low count (disaster). + * + * Rare situation? But module_refcount can be preempted, and we + * might be tallying up 4096+ CPUs. So it is not impossible. + */ + smp_rmb(); + for_each_possible_cpu(cpu) + incs += per_cpu_ptr(mod->refptr, cpu)->incs; + return incs - decs; } EXPORT_SYMBOL(module_refcount); @@ -785,23 +724,15 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, char name[MODULE_NAME_LEN]; int ret, forced = 0; - if (!capable(CAP_SYS_MODULE)) + if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; - /* Create stop_machine threads since free_module relies on - * a non-failing stop_machine call. */ - ret = stop_machine_create(); - if (ret) - return ret; - - if (mutex_lock_interruptible(&module_mutex) != 0) { - ret = -EINTR; - goto out_stop; - } + if (mutex_lock_interruptible(&module_mutex) != 0) + return -EINTR; mod = find_module(name); if (!mod) { @@ -809,7 +740,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, goto out; } - if (!list_empty(&mod->modules_which_use_me)) { + if (!list_empty(&mod->source_list)) { /* Other modules depend on us: get rid of them first. */ ret = -EWOULDBLOCK; goto out; @@ -853,16 +784,15 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); async_synchronize_full(); - mutex_lock(&module_mutex); + /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); ddebug_remove_module(mod->name); - free_module(mod); - out: + free_module(mod); + return 0; +out: mutex_unlock(&module_mutex); -out_stop: - stop_machine_destroy(); return ret; } @@ -875,9 +805,9 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) /* Always include a trailing , so userspace can differentiate between this and the old multi-field proc format. */ - list_for_each_entry(use, &mod->modules_which_use_me, list) { + list_for_each_entry(use, &mod->source_list, source_list) { printed_something = 1; - seq_printf(m, "%s,", use->module_which_uses->name); + seq_printf(m, "%s,", use->source->name); } if (mod->init != NULL && mod->exit == NULL) { @@ -894,22 +824,26 @@ void __symbol_put(const char *symbol) struct module *owner; preempt_disable(); - if (IS_ERR_VALUE(find_symbol(symbol, &owner, NULL, true, false))) + if (!find_symbol(symbol, &owner, NULL, true, false)) BUG(); module_put(owner); preempt_enable(); } EXPORT_SYMBOL(__symbol_put); +/* Note this assumes addr is a function, which it currently always is. */ void symbol_put_addr(void *addr) { struct module *modaddr; + unsigned long a = (unsigned long)dereference_function_descriptor(addr); - if (core_kernel_text((unsigned long)addr)) + if (core_kernel_text(a)) return; - if (!(modaddr = module_text_address((unsigned long)addr))) - BUG(); + /* module_text_address is safe here: we're supposed to have reference + * to module from symbol_get, so it can't go away. */ + modaddr = __module_text_address(a); + BUG_ON(!modaddr); module_put(modaddr); } EXPORT_SYMBOL_GPL(symbol_put_addr); @@ -928,12 +862,15 @@ static struct module_attribute refcnt = { void module_put(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_dec(__module_ref_addr(module, cpu)); + preempt_disable(); + smp_wmb(); /* see comment in module_refcount */ + __this_cpu_inc(module->refptr->decs); + + trace_module_put(module, _RET_IP_); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); - put_cpu(); + preempt_enable(); } } EXPORT_SYMBOL(module_put); @@ -949,10 +886,11 @@ static inline void module_unload_free(struct module *mod) { } -static inline int use_module(struct module *a, struct module *b) +int ref_module(struct module *a, struct module *b) { - return strong_try_module_get(b) == 0; + return strong_try_module_get(b); } +EXPORT_SYMBOL_GPL(ref_module); static inline void module_unload_init(struct module *mod) { @@ -995,12 +933,12 @@ static struct module_attribute *modinfo_attrs[] = { static const char vermagic[] = VERMAGIC_STRING; -static int try_to_force_load(struct module *mod, const char *symname) +static int try_to_force_load(struct module *mod, const char *reason) { #ifdef CONFIG_MODULE_FORCE_LOAD if (!test_taint(TAINT_FORCED_MODULE)) - printk("%s: no version for \"%s\" found: kernel tainted.\n", - mod->name, symname); + printk(KERN_WARNING "%s: %s: kernel tainted.\n", + mod->name, reason); add_taint_module(mod, TAINT_FORCED_MODULE); return 0; #else @@ -1009,11 +947,23 @@ static int try_to_force_load(struct module *mod, const char *symname) } #ifdef CONFIG_MODVERSIONS +/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */ +static unsigned long maybe_relocated(unsigned long crc, + const struct module *crc_owner) +{ +#ifdef ARCH_RELOCATES_KCRCTAB + if (crc_owner == NULL) + return crc - (unsigned long)reloc_start; +#endif + return crc; +} + static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc) + const unsigned long *crc, + const struct module *crc_owner) { unsigned int i, num_versions; struct modversion_info *versions; @@ -1034,10 +984,10 @@ static int check_version(Elf_Shdr *sechdrs, if (strcmp(versions[i].name, symname) != 0) continue; - if (versions[i].crc == *crc) + if (versions[i].crc == maybe_relocated(*crc, crc_owner)) return 1; DEBUGP("Found checksum %lX vs module %lX\n", - *crc, versions[i].crc); + maybe_relocated(*crc, crc_owner), versions[i].crc); goto bad_version; } @@ -1057,9 +1007,13 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, { const unsigned long *crc; - if (IS_ERR_VALUE(find_symbol("struct_module", NULL, &crc, true, false))) + /* Since this should be found in kernel (which can't be removed), + * no locking is necessary. */ + if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, + &crc, true, false)) BUG(); - return check_version(sechdrs, versindex, "struct_module", mod, crc); + return check_version(sechdrs, versindex, "module_layout", mod, crc, + NULL); } /* First part is kernel version, which we ignore if module has crcs. */ @@ -1077,7 +1031,8 @@ static inline int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc) + const unsigned long *crc, + const struct module *crc_owner) { return 1; } @@ -1096,27 +1051,60 @@ static inline int same_magic(const char *amagic, const char *bmagic, } #endif /* CONFIG_MODVERSIONS */ -/* Resolve a symbol for this module. I.e. if we find one, record usage. - Must be holding module_mutex. */ -static unsigned long resolve_symbol(Elf_Shdr *sechdrs, - unsigned int versindex, - const char *name, - struct module *mod) +/* Resolve a symbol for this module. I.e. if we find one, record usage. */ +static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, + unsigned int versindex, + const char *name, + struct module *mod, + char ownername[]) { struct module *owner; - unsigned long ret; + const struct kernel_symbol *sym; const unsigned long *crc; + int err; - ret = find_symbol(name, &owner, &crc, + mutex_lock(&module_mutex); + sym = find_symbol(name, &owner, &crc, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); - if (!IS_ERR_VALUE(ret)) { - /* use_module can fail due to OOM, - or module initialization or unloading */ - if (!check_version(sechdrs, versindex, name, mod, crc) || - !use_module(mod, owner)) - ret = -EINVAL; + if (!sym) + goto unlock; + + if (!check_version(sechdrs, versindex, name, mod, crc, owner)) { + sym = ERR_PTR(-EINVAL); + goto getname; } - return ret; + + err = ref_module(mod, owner); + if (err) { + sym = ERR_PTR(err); + goto getname; + } + +getname: + /* We must make copy under the lock if we failed to get ref. */ + strncpy(ownername, module_name(owner), MODULE_NAME_LEN); +unlock: + mutex_unlock(&module_mutex); + return sym; +} + +static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs, + unsigned int versindex, + const char *name, + struct module *mod) +{ + const struct kernel_symbol *ksym; + char ownername[MODULE_NAME_LEN]; + + if (wait_event_interruptible_timeout(module_wq, + !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name, + mod, ownername)) || + PTR_ERR(ksym) != -EBUSY, + 30 * HZ) <= 0) { + printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", + mod->name, ownername); + } + return ksym; } /* @@ -1124,6 +1112,12 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, * J. Corbet */ #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) + +static inline bool sect_empty(const Elf_Shdr *sect) +{ + return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0; +} + struct module_sect_attr { struct module_attribute mattr; @@ -1165,7 +1159,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, /* Count loaded sections and allocate structures */ for (i = 0; i < nsect; i++) - if (sechdrs[i].sh_flags & SHF_ALLOC) + if (!sect_empty(&sechdrs[i])) nloaded++; size[0] = ALIGN(sizeof(*sect_attrs) + nloaded * sizeof(sect_attrs->attrs[0]), @@ -1183,7 +1177,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, sattr = §_attrs->attrs[0]; gattr = §_attrs->grp.attrs[0]; for (i = 0; i < nsect; i++) { - if (! (sechdrs[i].sh_flags & SHF_ALLOC)) + if (sect_empty(&sechdrs[i])) continue; sattr->address = sechdrs[i].sh_addr; sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, @@ -1191,6 +1185,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, if (sattr->name == NULL) goto out; sect_attrs->nsections++; + sysfs_attr_init(&sattr->mattr.attr); sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; @@ -1230,7 +1225,7 @@ struct module_notes_attrs { struct bin_attribute attrs[0]; }; -static ssize_t module_notes_read(struct kobject *kobj, +static ssize_t module_notes_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { @@ -1260,10 +1255,14 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, struct module_notes_attrs *notes_attrs; struct bin_attribute *nattr; + /* failed to create section attributes, so can't create notes */ + if (!mod->sect_attrs) + return; + /* Count notes sections and allocate structures. */ notes = 0; for (i = 0; i < nsect; i++) - if ((sechdrs[i].sh_flags & SHF_ALLOC) && + if (!sect_empty(&sechdrs[i]) && (sechdrs[i].sh_type == SHT_NOTE)) ++notes; @@ -1279,9 +1278,10 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, notes_attrs->notes = notes; nattr = ¬es_attrs->attrs[0]; for (loaded = i = 0; i < nsect; ++i) { - if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + if (sect_empty(&sechdrs[i])) continue; if (sechdrs[i].sh_type == SHT_NOTE) { + sysfs_bin_attr_init(nattr); nattr->attr.name = mod->sect_attrs->attrs[loaded].name; nattr->attr.mode = S_IRUGO; nattr->size = sechdrs[i].sh_size; @@ -1336,7 +1336,34 @@ static inline void remove_notes_attrs(struct module *mod) #endif #ifdef CONFIG_SYSFS -int module_add_modinfo_attrs(struct module *mod) +static void add_usage_links(struct module *mod) +{ +#ifdef CONFIG_MODULE_UNLOAD + struct module_use *use; + int nowarn; + + mutex_lock(&module_mutex); + list_for_each_entry(use, &mod->target_list, target_list) { + nowarn = sysfs_create_link(use->target->holders_dir, + &mod->mkobj.kobj, mod->name); + } + mutex_unlock(&module_mutex); +#endif +} + +static void del_usage_links(struct module *mod) +{ +#ifdef CONFIG_MODULE_UNLOAD + struct module_use *use; + + mutex_lock(&module_mutex); + list_for_each_entry(use, &mod->target_list, target_list) + sysfs_remove_link(use->target->holders_dir, mod->name); + mutex_unlock(&module_mutex); +#endif +} + +static int module_add_modinfo_attrs(struct module *mod) { struct module_attribute *attr; struct module_attribute *temp_attr; @@ -1354,6 +1381,7 @@ int module_add_modinfo_attrs(struct module *mod) if (!attr->test || (attr->test && attr->test(mod))) { memcpy(temp_attr, attr, sizeof(*temp_attr)); + sysfs_attr_init(&temp_attr->attr); error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); ++temp_attr; } @@ -1361,7 +1389,7 @@ int module_add_modinfo_attrs(struct module *mod) return error; } -void module_remove_modinfo_attrs(struct module *mod) +static void module_remove_modinfo_attrs(struct module *mod) { struct module_attribute *attr; int i; @@ -1377,7 +1405,7 @@ void module_remove_modinfo_attrs(struct module *mod) kfree(mod->modinfo_attrs); } -int mod_sysfs_init(struct module *mod) +static int mod_sysfs_init(struct module *mod) { int err; struct kobject *kobj; @@ -1411,12 +1439,16 @@ out: return err; } -int mod_sysfs_setup(struct module *mod, +static int mod_sysfs_setup(struct module *mod, struct kernel_param *kparam, unsigned int num_params) { int err; + err = mod_sysfs_init(mod); + if (err) + goto out; + mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj); if (!mod->holders_dir) { err = -ENOMEM; @@ -1431,6 +1463,8 @@ int mod_sysfs_setup(struct module *mod, if (err) goto out_unreg_param; + add_usage_links(mod); + kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); return 0; @@ -1440,6 +1474,7 @@ out_unreg_holders: kobject_put(mod->holders_dir); out_unreg: kobject_put(&mod->mkobj.kobj); +out: return err; } @@ -1450,14 +1485,40 @@ static void mod_sysfs_fini(struct module *mod) #else /* CONFIG_SYSFS */ +static inline int mod_sysfs_init(struct module *mod) +{ + return 0; +} + +static inline int mod_sysfs_setup(struct module *mod, + struct kernel_param *kparam, + unsigned int num_params) +{ + return 0; +} + +static inline int module_add_modinfo_attrs(struct module *mod) +{ + return 0; +} + +static inline void module_remove_modinfo_attrs(struct module *mod) +{ +} + static void mod_sysfs_fini(struct module *mod) { } +static void del_usage_links(struct module *mod) +{ +} + #endif /* CONFIG_SYSFS */ static void mod_kobject_remove(struct module *mod) { + del_usage_links(mod); module_remove_modinfo_attrs(mod); module_param_sysfs_remove(mod); kobject_put(mod->mkobj.drivers_dir); @@ -1476,11 +1537,15 @@ static int __unlink_module(void *_mod) return 0; } -/* Free a module, remove from lists, etc (must hold module_mutex). */ +/* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { + trace_module_free(mod); + /* Delete from various lists */ + mutex_lock(&module_mutex); stop_machine(__unlink_module, mod, NULL); + mutex_unlock(&module_mutex); remove_notes_attrs(mod); remove_sect_attrs(mod); mod_kobject_remove(mod); @@ -1494,45 +1559,45 @@ static void free_module(struct module *mod) /* Free any allocated parameters. */ destroy_params(mod->kp, mod->num_kp); - /* release any pointers to mcount in this module */ - ftrace_release(mod->module_core, mod->core_size); - /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); - if (mod->percpu) - percpu_modfree(mod->percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + percpu_modfree(mod); +#if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) - percpu_modfree(mod->refptr); + free_percpu(mod->refptr); #endif /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ module_free(mod, mod->module_core); + +#ifdef CONFIG_MPU + update_protections(current->mm); +#endif } void *__symbol_get(const char *symbol) { struct module *owner; - unsigned long value; + const struct kernel_symbol *sym; preempt_disable(); - value = find_symbol(symbol, &owner, NULL, true, true); - if (IS_ERR_VALUE(value)) - value = 0; - else if (strong_try_module_get(owner)) - value = 0; + sym = find_symbol(symbol, &owner, NULL, true, true); + if (sym && strong_try_module_get(owner)) + sym = NULL; preempt_enable(); - return (void *)value; + return sym ? (void *)sym->value : NULL; } EXPORT_SYMBOL_GPL(__symbol_get); /* * Ensure that an exported symbol [global namespace] does not already exist * in the kernel or in some other module's exported symbol table. + * + * You must hold the module_mutex. */ static int verify_export_symbols(struct module *mod) { @@ -1554,8 +1619,7 @@ static int verify_export_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { - if (!IS_ERR_VALUE(find_symbol(s->name, &owner, - NULL, true, false))) { + if (find_symbol(s->name, &owner, NULL, true, false)) { printk(KERN_ERR "%s: exports duplicate symbol %s" " (owned by %s)\n", @@ -1579,6 +1643,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, unsigned long secbase; unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); int ret = 0; + const struct kernel_symbol *ksym; for (i = 1; i < n; i++) { switch (sym[i].st_shndx) { @@ -1598,26 +1663,29 @@ static int simplify_symbols(Elf_Shdr *sechdrs, break; case SHN_UNDEF: - sym[i].st_value - = resolve_symbol(sechdrs, versindex, - strtab + sym[i].st_name, mod); - + ksym = resolve_symbol_wait(sechdrs, versindex, + strtab + sym[i].st_name, + mod); /* Ok if resolved. */ - if (!IS_ERR_VALUE(sym[i].st_value)) + if (ksym && !IS_ERR(ksym)) { + sym[i].st_value = ksym->value; break; + } + /* Ok if weak. */ - if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) + if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK) break; - printk(KERN_WARNING "%s: Unknown symbol %s\n", - mod->name, strtab + sym[i].st_name); - ret = -ENOENT; + printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n", + mod->name, strtab + sym[i].st_name, + PTR_ERR(ksym)); + ret = PTR_ERR(ksym) ?: -ENOENT; break; default: /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == pcpuindex) - secbase = (unsigned long)mod->percpu; + secbase = (unsigned long)mod_percpu(mod); else secbase = sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; @@ -1679,8 +1747,7 @@ static void layout_sections(struct module *mod, if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || strncmp(secstrings + s->sh_name, - ".init", 5) == 0) + || strstarts(secstrings + s->sh_name, ".init")) continue; s->sh_entsize = get_offset(mod, &mod->core_size, s, i); DEBUGP("\t%s\n", secstrings + s->sh_name); @@ -1697,8 +1764,7 @@ static void layout_sections(struct module *mod, if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || strncmp(secstrings + s->sh_name, - ".init", 5) != 0) + || !strstarts(secstrings + s->sh_name, ".init")) continue; s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) | INIT_OFFSET_MASK); @@ -1771,6 +1837,17 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, } } +static void free_modinfo(struct module *mod) +{ + struct module_attribute *attr; + int i; + + for (i = 0; (attr = modinfo_attrs[i]); i++) { + if (attr->free) + attr->free(mod); + } +} + #ifdef CONFIG_KALLSYMS /* lookup symbol in given range of kernel_symbols */ @@ -1831,19 +1908,98 @@ static char elf_type(const Elf_Sym *sym, else return 'b'; } - if (strncmp(secstrings + sechdrs[sym->st_shndx].sh_name, - ".debug", strlen(".debug")) == 0) + if (strstarts(secstrings + sechdrs[sym->st_shndx].sh_name, ".debug")) return 'n'; return '?'; } +static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, + unsigned int shnum) +{ + const Elf_Shdr *sec; + + if (src->st_shndx == SHN_UNDEF + || src->st_shndx >= shnum + || !src->st_name) + return false; + + sec = sechdrs + src->st_shndx; + if (!(sec->sh_flags & SHF_ALLOC) +#ifndef CONFIG_KALLSYMS_ALL + || !(sec->sh_flags & SHF_EXECINSTR) +#endif + || (sec->sh_entsize & INIT_OFFSET_MASK)) + return false; + + return true; +} + +static unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + unsigned int strindex, + const Elf_Ehdr *hdr, + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) +{ + unsigned long symoffs; + Elf_Shdr *symsect = sechdrs + symindex; + Elf_Shdr *strsect = sechdrs + strindex; + const Elf_Sym *src; + const char *strtab; + unsigned int i, nsrc, ndst; + + /* Put symbol section at end of init part of module. */ + symsect->sh_flags |= SHF_ALLOC; + symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + symsect->sh_name); + + src = (void *)hdr + symsect->sh_offset; + nsrc = symsect->sh_size / sizeof(*src); + strtab = (void *)hdr + strsect->sh_offset; + for (ndst = i = 1; i < nsrc; ++i, ++src) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) { + unsigned int j = src->st_name; + + while(!__test_and_set_bit(j, strmap) && strtab[j]) + ++j; + ++ndst; + } + + /* Append room for core symbols at end of core part. */ + symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); + mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + + /* Put string table section at end of init part of module. */ + strsect->sh_flags |= SHF_ALLOC; + strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + strsect->sh_name); + + /* Append room for core symbols' strings at end of core part. */ + *pstroffs = mod->core_size; + __set_bit(0, strmap); + mod->core_size += bitmap_weight(strmap, strsect->sh_size); + + return symoffs; +} + static void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, - const char *secstrings) + unsigned long symoffs, + unsigned long stroffs, + const char *secstrings, + unsigned long *strmap) { - unsigned int i; + unsigned int i, ndst; + const Elf_Sym *src; + Elf_Sym *dst; + char *s; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1853,13 +2009,46 @@ static void add_kallsyms(struct module *mod, for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); + + mod->core_symtab = dst = mod->module_core + symoffs; + src = mod->symtab; + *dst = *src; + for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { + if (!is_core_symbol(src, sechdrs, shnum)) + continue; + dst[ndst] = *src; + dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); + ++ndst; + } + mod->core_num_syms = ndst; + + mod->core_strtab = s = mod->module_core + stroffs; + for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i) + if (test_bit(i, strmap)) + *++s = mod->strtab[i]; } #else +static inline unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + unsigned int strindex, + const Elf_Ehdr *hdr, + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) +{ + return 0; +} + static inline void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, - const char *secstrings) + unsigned long symoffs, + unsigned long stroffs, + const char *secstrings, + const unsigned long *strmap) { } #endif /* CONFIG_KALLSYMS */ @@ -1878,15 +2067,44 @@ static void *module_alloc_update_bounds(unsigned long size) void *ret = module_alloc(size); if (ret) { + mutex_lock(&module_mutex); /* Update module bounds. */ if ((unsigned long)ret < module_addr_min) module_addr_min = (unsigned long)ret; if ((unsigned long)ret + size > module_addr_max) module_addr_max = (unsigned long)ret + size; + mutex_unlock(&module_mutex); } return ret; } +#ifdef CONFIG_DEBUG_KMEMLEAK +static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, char *secstrings) +{ + unsigned int i; + + /* only scan the sections containing data */ + kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); + + for (i = 1; i < hdr->e_shnum; i++) { + if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + continue; + if (strncmp(secstrings + sechdrs[i].sh_name, ".data", 5) != 0 + && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0) + continue; + + kmemleak_scan_area((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size, GFP_KERNEL); + } +} +#else +static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, char *secstrings) +{ +} +#endif + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ static noinline struct module *load_module(void __user *umod, @@ -1901,11 +2119,12 @@ static noinline struct module *load_module(void __user *umod, unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; - unsigned int num_mcount; struct module *mod; long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ - unsigned long *mseg; + void *ptr = NULL; /* Stops spurious gcc warning */ + unsigned long symoffs, stroffs, *strmap; + void __percpu *percpu; + mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -1918,12 +2137,6 @@ static noinline struct module *load_module(void __user *umod, if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) return ERR_PTR(-ENOMEM); - /* Create stop_machine threads since the error path relies on - * a non-failing stop_machine call. */ - err = stop_machine_create(); - if (err) - goto free_hdr; - if (copy_from_user(hdr, umod, len) != 0) { err = -EFAULT; goto free_hdr; @@ -1964,7 +2177,7 @@ static noinline struct module *load_module(void __user *umod, } #ifndef CONFIG_MODULE_UNLOAD /* Don't load .exit sections */ - if (strncmp(secstrings+sechdrs[i].sh_name, ".exit", 5) == 0) + if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; #endif } @@ -1993,11 +2206,6 @@ static noinline struct module *load_module(void __user *umod, /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; -#ifdef CONFIG_KALLSYMS - /* Keep symbol and string tables for decoding later. */ - sechdrs[symindex].sh_flags |= SHF_ALLOC; - sechdrs[strindex].sh_flags |= SHF_ALLOC; -#endif /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2008,7 +2216,7 @@ static noinline struct module *load_module(void __user *umod, modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { - err = try_to_force_load(mod, "magic"); + err = try_to_force_load(mod, "bad vermagic"); if (err) goto free_hdr; } else if (!same_magic(modmagic, vermagic, versindex)) { @@ -2033,8 +2241,10 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } - if (find_module(mod->name)) { - err = -EEXIST; + strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size) + * sizeof(long), GFP_KERNEL); + if (!strmap) { + err = -ENOMEM; goto free_mod; } @@ -2047,24 +2257,30 @@ static noinline struct module *load_module(void __user *umod, if (pcpuindex) { /* We have a special allocation for this section. */ - percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign, - mod->name); - if (!percpu) { - err = -ENOMEM; + err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, + sechdrs[pcpuindex].sh_addralign); + if (err) goto free_mod; - } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - mod->percpu = percpu; } + /* Keep this around for failure path. */ + percpu = mod_percpu(mod); /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, + secstrings, &stroffs, strmap); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); + /* + * The pointer to this block is stored in the module structure + * which is inside the block. Just mark it as not being a + * leak. + */ + kmemleak_not_leak(ptr); if (!ptr) { err = -ENOMEM; goto free_percpu; @@ -2073,6 +2289,13 @@ static noinline struct module *load_module(void __user *umod, mod->module_core = ptr; ptr = module_alloc_update_bounds(mod->init_size); + /* + * The pointer to this block is stored in the module structure + * which is inside the block. This block doesn't need to be + * scanned as it contains data and code that will be freed + * after the module is initialized. + */ + kmemleak_ignore(ptr); if (!ptr && mod->init_size) { err = -ENOMEM; goto free_core; @@ -2103,10 +2326,10 @@ static noinline struct module *load_module(void __user *umod, } /* Module has been moved. */ mod = (void *)sechdrs[modindex].sh_addr; + kmemleak_load_module(mod, hdr, sechdrs, secstrings); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); +#if defined(CONFIG_MODULE_UNLOAD) + mod->refptr = alloc_percpu(struct module_ref); if (!mod->refptr) { err = -ENOMEM; goto free_init; @@ -2115,11 +2338,6 @@ static noinline struct module *load_module(void __user *umod, /* Now we've moved module, initialize linked lists, etc. */ module_unload_init(mod); - /* add kobject, so we can reference it. */ - err = mod_sysfs_init(mod); - if (err) - goto free_unload; - /* Set up license info based on the info section */ set_license(mod, get_modinfo(sechdrs, infoindex, "license")); @@ -2176,18 +2394,36 @@ static noinline struct module *load_module(void __user *umod, mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); #endif - -#ifdef CONFIG_MARKERS - mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", - sizeof(*mod->markers), &mod->num_markers); +#ifdef CONFIG_CONSTRUCTORS + mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors", + sizeof(*mod->ctors), &mod->num_ctors); #endif + #ifdef CONFIG_TRACEPOINTS mod->tracepoints = section_objs(hdr, sechdrs, secstrings, "__tracepoints", sizeof(*mod->tracepoints), &mod->num_tracepoints); #endif - +#ifdef CONFIG_EVENT_TRACING + mod->trace_events = section_objs(hdr, sechdrs, secstrings, + "_ftrace_events", + sizeof(*mod->trace_events), + &mod->num_trace_events); + /* + * This section contains pointers to allocated objects in the trace + * code and not scanning it leads to false positives. + */ + kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * + mod->num_trace_events, GFP_KERNEL); +#endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + /* sechdrs[0].sh_size is always zero */ + mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings, + "__mcount_loc", + sizeof(*mod->ftrace_callsites), + &mod->num_ftrace_callsites); +#endif #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) || (mod->num_gpl_syms && !mod->gpl_crcs) @@ -2197,8 +2433,8 @@ static noinline struct module *load_module(void __user *umod, || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) #endif ) { - printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); - err = try_to_force_load(mod, "nocrc"); + err = try_to_force_load(mod, + "no versions for exported symbols"); if (err) goto cleanup; } @@ -2226,21 +2462,19 @@ static noinline struct module *load_module(void __user *umod, goto cleanup; } - /* Find duplicate symbols */ - err = verify_export_symbols(mod); - if (err < 0) - goto cleanup; - /* Set up and sort exception table */ mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Finally, copy percpu area over. */ - percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, + percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, + symoffs, stroffs, secstrings, strmap); + kfree(strmap); + strmap = NULL; if (!mod->taints) { struct _ddebug *debug; @@ -2252,11 +2486,6 @@ static noinline struct module *load_module(void __user *umod, dynamic_debug_setup(debug, num_debug); } - /* sechdrs[0].sh_size is always zero */ - mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", - sizeof(*mseg), &num_mcount); - ftrace_init_module(mod, mseg, mseg + num_mcount); - err = module_finalize(hdr, sechdrs, mod); if (err < 0) goto cleanup; @@ -2291,7 +2520,19 @@ static noinline struct module *load_module(void __user *umod, * function to insert in a way safe to concurrent readers. * The mutex protects against concurrent writers. */ + mutex_lock(&module_mutex); + if (find_module(mod->name)) { + err = -EEXIST; + goto unlock; + } + + /* Find duplicate symbols */ + err = verify_export_symbols(mod); + if (err < 0) + goto unlock; + list_add_rcu(&mod->list, &modules); + mutex_unlock(&module_mutex); err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); if (err < 0) @@ -2300,41 +2541,44 @@ static noinline struct module *load_module(void __user *umod, err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); if (err < 0) goto unlink; + add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); /* Get rid of temporary copy */ vfree(hdr); - stop_machine_destroy(); + trace_module_load(mod); + /* Done! */ return mod; unlink: - stop_machine(__unlink_module, mod, NULL); + mutex_lock(&module_mutex); + /* Unlink carefully: kallsyms could be walking list. */ + list_del_rcu(&mod->list); + unlock: + mutex_unlock(&module_mutex); + synchronize_sched(); module_arch_cleanup(mod); cleanup: - kobject_del(&mod->mkobj.kobj); - kobject_put(&mod->mkobj.kobj); - ftrace_release(mod->module_core, mod->core_size); - free_unload: + free_modinfo(mod); module_unload_free(mod); +#if defined(CONFIG_MODULE_UNLOAD) + free_percpu(mod->refptr); free_init: -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); #endif module_free(mod, mod->module_init); free_core: module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: - if (percpu) - percpu_modfree(percpu); + free_percpu(percpu); free_mod: kfree(args); + kfree(strmap); free_hdr: vfree(hdr); - stop_machine_destroy(); return ERR_PTR(err); truncated: @@ -2343,6 +2587,17 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } +/* Call module constructors. */ +static void do_mod_ctors(struct module *mod) +{ +#ifdef CONFIG_CONSTRUCTORS + unsigned long i; + + for (i = 0; i < mod->num_ctors; i++) + mod->ctors[i](); +#endif +} + /* This is where the real work happens */ SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs) @@ -2351,26 +2606,18 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, int ret = 0; /* Must have permission */ - if (!capable(CAP_SYS_MODULE)) + if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; - /* Only one module load at a time, please */ - if (mutex_lock_interruptible(&module_mutex) != 0) - return -EINTR; - /* Do all the hard work */ mod = load_module(umod, len, uargs); - if (IS_ERR(mod)) { - mutex_unlock(&module_mutex); + if (IS_ERR(mod)) return PTR_ERR(mod); - } - - /* Drop lock so they can recurse */ - mutex_unlock(&module_mutex); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); + do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) ret = do_one_initcall(mod->init); @@ -2382,16 +2629,14 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, module_put(mod); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); - mutex_lock(&module_mutex); free_module(mod); - mutex_unlock(&module_mutex); wake_up(&module_wq); return ret; } if (ret > 0) { - printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, " - "it should follow 0/-E convention\n" - KERN_WARNING "%s: loading module anyway...\n", + printk(KERN_WARNING +"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" +"%s: loading module anyway...\n", __func__, mod->name, ret, __func__); dump_stack(); @@ -2403,9 +2648,18 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_LIVE, mod); + /* We need to finish all async code before the module init sequence is done */ + async_synchronize_full(); + mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); + trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; +#endif module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; @@ -2611,6 +2865,25 @@ unsigned long module_kallsyms_lookup_name(const char *name) preempt_enable(); return ret; } + +int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, unsigned long), + void *data) +{ + struct module *mod; + unsigned int i; + int ret; + + list_for_each_entry(mod, &modules, list) { + for (i = 0; i < mod->num_symtab; i++) { + ret = fn(data, mod->strtab + mod->symtab[i].st_name, + mod, mod->symtab[i].st_value); + if (ret != 0) + return ret; + } + } + return 0; +} #endif /* CONFIG_KALLSYMS */ static char *module_flags(struct module *mod, char *buf) @@ -2746,29 +3019,31 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) } /* - * Is this a valid module address? + * is_module_address - is this address inside a module? + * @addr: the address to check. + * + * See is_module_text_address() if you simply want to see if the address + * is code (not data). */ -int is_module_address(unsigned long addr) +bool is_module_address(unsigned long addr) { - struct module *mod; + bool ret; preempt_disable(); - - list_for_each_entry_rcu(mod, &modules, list) { - if (within_module_core(addr, mod)) { - preempt_enable(); - return 1; - } - } - + ret = __module_address(addr) != NULL; preempt_enable(); - return 0; + return ret; } - -/* Is this a valid kernel address? */ -__notrace_funcgraph struct module *__module_text_address(unsigned long addr) +/* + * __module_address - get the module which contains an address. + * @addr: the address. + * + * Must be called with preempt disabled or module mutex held so that + * module doesn't get freed during this. + */ +struct module *__module_address(unsigned long addr) { struct module *mod; @@ -2776,22 +3051,51 @@ __notrace_funcgraph struct module *__module_text_address(unsigned long addr) return NULL; list_for_each_entry_rcu(mod, &modules, list) - if (within(addr, mod->module_init, mod->init_text_size) - || within(addr, mod->module_core, mod->core_text_size)) + if (within_module_core(addr, mod) + || within_module_init(addr, mod)) return mod; return NULL; } +EXPORT_SYMBOL_GPL(__module_address); -struct module *module_text_address(unsigned long addr) +/* + * is_module_text_address - is this address inside module code? + * @addr: the address to check. + * + * See is_module_address() if you simply want to see if the address is + * anywhere in a module. See kernel_text_address() for testing if an + * address corresponds to kernel or module code. + */ +bool is_module_text_address(unsigned long addr) { - struct module *mod; + bool ret; preempt_disable(); - mod = __module_text_address(addr); + ret = __module_text_address(addr) != NULL; preempt_enable(); + return ret; +} + +/* + * __module_text_address - get the module whose code contains an address. + * @addr: the address. + * + * Must be called with preempt disabled or module mutex held so that + * module doesn't get freed during this. + */ +struct module *__module_text_address(unsigned long addr) +{ + struct module *mod = __module_address(addr); + if (mod) { + /* Make sure it's within the text section. */ + if (!within(addr, mod->module_init, mod->init_text_size) + && !within(addr, mod->module_core, mod->core_text_size)) + mod = NULL; + } return mod; } +EXPORT_SYMBOL_GPL(__module_text_address); /* Don't grab lock, we're oopsing. */ void print_modules(void) @@ -2799,7 +3103,7 @@ void print_modules(void) struct module *mod; char buf[8]; - printk("Modules linked in:"); + printk(KERN_DEFAULT "Modules linked in:"); /* Most callers should already have preempt disabled, but make sure */ preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) @@ -2811,23 +3115,16 @@ void print_modules(void) } #ifdef CONFIG_MODVERSIONS -/* Generate the signature for struct module here, too, for modversions. */ -void struct_module(struct module *mod) { return; } -EXPORT_SYMBOL(struct_module); -#endif - -#ifdef CONFIG_MARKERS -void module_update_markers(void) +/* Generate the signature for all relevant module structures here. + * If these change, we don't want to try to parse the module. */ +void module_layout(struct module *mod, + struct modversion_info *ver, + struct kernel_param *kp, + struct kernel_symbol *ks, + struct tracepoint *tp) { - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry(mod, &modules, list) - if (!mod->taints) - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - mutex_unlock(&module_mutex); } +EXPORT_SYMBOL(module_layout); #endif #ifdef CONFIG_TRACEPOINTS