X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=kernel%2Fhw_breakpoint.c;h=7a56b22e0602f0b37338c189110d65c07e7cc730;hb=fc2362afd5ab9456caab4de317da796cc88944fe;hp=c16662268d754d81e74670f3ab1f27122bd6557c;hpb=ba6909b719a5ccc0c8100d2895bb7ff557b2eeae;p=safe%2Fjmp%2Flinux-2.6 diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c166622..7a56b22 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -40,22 +40,29 @@ #include #include #include +#include +#include #include #include + /* * Constraints data */ /* Number of pinned cpu breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); +static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]); /* Number of pinned task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); +static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]); /* Number of non-pinned cpu/task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); +static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); + +static int nr_slots[TYPE_MAX]; + +static int constraints_initialized; /* Gather the number of total pinned and un-pinned bp in a cpuset */ struct bp_busy_slots { @@ -66,16 +73,29 @@ struct bp_busy_slots { /* Serialize accesses to the above constraints */ static DEFINE_MUTEX(nr_bp_mutex); +__weak int hw_breakpoint_weight(struct perf_event *bp) +{ + return 1; +} + +static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) +{ + if (bp->attr.bp_type & HW_BREAKPOINT_RW) + return TYPE_DATA; + + return TYPE_INST; +} + /* * Report the maximum number of pinned breakpoints a task * have in this cpu */ -static unsigned int max_task_bp_pinned(int cpu) +static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) { int i; - unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); + unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); - for (i = HBP_NUM -1; i >= 0; i--) { + for (i = nr_slots[type] - 1; i >= 0; i--) { if (tsk_pinned[i] > 0) return i + 1; } @@ -83,16 +103,54 @@ static unsigned int max_task_bp_pinned(int cpu) return 0; } +static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) +{ + struct perf_event_context *ctx = tsk->perf_event_ctxp; + struct list_head *list; + struct perf_event *bp; + unsigned long flags; + int count = 0; + + if (WARN_ONCE(!ctx, "No perf context for this task")) + return 0; + + list = &ctx->event_list; + + raw_spin_lock_irqsave(&ctx->lock, flags); + + /* + * The current breakpoint counter is not included in the list + * at the open() callback time + */ + list_for_each_entry(bp, list, event_entry) { + if (bp->attr.type == PERF_TYPE_BREAKPOINT) + if (find_slot_idx(bp) == type) + count += hw_breakpoint_weight(bp); + } + + raw_spin_unlock_irqrestore(&ctx->lock, flags); + + return count; +} + /* * Report the number of pinned/un-pinned breakpoints we have in * a given cpu (cpu > -1) or in all of them (cpu = -1). */ -static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) +static void +fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, + enum bp_type_idx type) { + int cpu = bp->cpu; + struct task_struct *tsk = bp->ctx->task; + if (cpu >= 0) { - slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); - slots->pinned += max_task_bp_pinned(cpu); - slots->flexible = per_cpu(nr_bp_flexible, cpu); + slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); + if (!tsk) + slots->pinned += max_task_bp_pinned(cpu, type); + else + slots->pinned += task_bp_pinned(tsk, type); + slots->flexible = per_cpu(nr_bp_flexible[type], cpu); return; } @@ -100,13 +158,16 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) for_each_online_cpu(cpu) { unsigned int nr; - nr = per_cpu(nr_cpu_bp_pinned, cpu); - nr += max_task_bp_pinned(cpu); + nr = per_cpu(nr_cpu_bp_pinned[type], cpu); + if (!tsk) + nr += max_task_bp_pinned(cpu, type); + else + nr += task_bp_pinned(tsk, type); if (nr > slots->pinned) slots->pinned = nr; - nr = per_cpu(nr_bp_flexible, cpu); + nr = per_cpu(nr_bp_flexible[type], cpu); if (nr > slots->flexible) slots->flexible = nr; @@ -114,54 +175,49 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) } /* - * Add a pinned breakpoint for the given task in our constraint table + * For now, continue to consider flexible as pinned, until we can + * ensure no flexible event can ever be scheduled before a pinned event + * in a same cpu. */ -static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) +static void +fetch_this_slot(struct bp_busy_slots *slots, int weight) { - int count = 0; - struct perf_event *bp; - struct perf_event_context *ctx = tsk->perf_event_ctxp; - unsigned int *task_bp_pinned; - struct list_head *list; - unsigned long flags; - - if (WARN_ONCE(!ctx, "No perf context for this task")) - return; - - list = &ctx->event_list; - - spin_lock_irqsave(&ctx->lock, flags); - - /* - * The current breakpoint counter is not included in the list - * at the open() callback time - */ - list_for_each_entry(bp, list, event_entry) { - if (bp->attr.type == PERF_TYPE_BREAKPOINT) - count++; - } + slots->pinned += weight; +} - spin_unlock_irqrestore(&ctx->lock, flags); +/* + * Add a pinned breakpoint for the given task in our constraint table + */ +static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, + enum bp_type_idx type, int weight) +{ + unsigned int *tsk_pinned; + int old_count = 0; + int old_idx = 0; + int idx = 0; - if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) - return; + old_count = task_bp_pinned(tsk, type); + old_idx = old_count - 1; + idx = old_idx + weight; - task_bp_pinned = per_cpu(task_bp_pinned, cpu); + tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); if (enable) { - task_bp_pinned[count]++; - if (count > 0) - task_bp_pinned[count-1]--; + tsk_pinned[idx]++; + if (old_count > 0) + tsk_pinned[old_idx]--; } else { - task_bp_pinned[count]--; - if (count > 0) - task_bp_pinned[count-1]++; + tsk_pinned[idx]--; + if (old_count > 0) + tsk_pinned[old_idx]++; } } /* * Add/remove the given breakpoint in our constraint table */ -static void toggle_bp_slot(struct perf_event *bp, bool enable) +static void +toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, + int weight) { int cpu = bp->cpu; struct task_struct *tsk = bp->ctx->task; @@ -169,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) /* Pinned counter task profiling */ if (tsk) { if (cpu >= 0) { - toggle_bp_task_slot(tsk, cpu, enable); + toggle_bp_task_slot(tsk, cpu, enable, type, weight); return; } for_each_online_cpu(cpu) - toggle_bp_task_slot(tsk, cpu, enable); + toggle_bp_task_slot(tsk, cpu, enable, type, weight); return; } /* Pinned counter cpu profiling */ if (enable) - per_cpu(nr_cpu_bp_pinned, bp->cpu)++; + per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; else - per_cpu(nr_cpu_bp_pinned, bp->cpu)--; + per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; } /* @@ -193,7 +249,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * - If attached to a single cpu, check: * * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) - * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM + * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM * * -> If there are already non-pinned counters in this cpu, it means * there is already a free slot for them. @@ -204,7 +260,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * - If attached to every cpus, check: * * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) - * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM + * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM * * -> This is roughly the same, except we check the number of per cpu * bp for every cpu and we keep the max one. Same for the per tasks @@ -216,7 +272,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * - If attached to a single cpu, check: * * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) - * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM + * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM * * -> Same checks as before. But now the nr_bp_flexible, if any, must keep * one register at least (or they will never be fed). @@ -224,156 +280,188 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * - If attached to every cpus, check: * * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) - * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM + * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM */ -int reserve_bp_slot(struct perf_event *bp) +static int __reserve_bp_slot(struct perf_event *bp) { struct bp_busy_slots slots = {0}; - int ret = 0; + enum bp_type_idx type; + int weight; - mutex_lock(&nr_bp_mutex); + /* We couldn't initialize breakpoint constraints on boot */ + if (!constraints_initialized) + return -ENOMEM; + + /* Basic checks */ + if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || + bp->attr.bp_type == HW_BREAKPOINT_INVALID) + return -EINVAL; + + type = find_slot_idx(bp); + weight = hw_breakpoint_weight(bp); - fetch_bp_busy_slots(&slots, bp->cpu); + fetch_bp_busy_slots(&slots, bp, type); + fetch_this_slot(&slots, weight); /* Flexible counters need to keep at least one slot */ - if (slots.pinned + (!!slots.flexible) == HBP_NUM) { - ret = -ENOSPC; - goto end; - } + if (slots.pinned + (!!slots.flexible) > nr_slots[type]) + return -ENOSPC; - toggle_bp_slot(bp, true); + toggle_bp_slot(bp, true, type, weight); + + return 0; +} + +int reserve_bp_slot(struct perf_event *bp) +{ + int ret; + + mutex_lock(&nr_bp_mutex); + + ret = __reserve_bp_slot(bp); -end: mutex_unlock(&nr_bp_mutex); return ret; } +static void __release_bp_slot(struct perf_event *bp) +{ + enum bp_type_idx type; + int weight; + + type = find_slot_idx(bp); + weight = hw_breakpoint_weight(bp); + toggle_bp_slot(bp, false, type, weight); +} + void release_bp_slot(struct perf_event *bp) { mutex_lock(&nr_bp_mutex); - toggle_bp_slot(bp, false); + __release_bp_slot(bp); mutex_unlock(&nr_bp_mutex); } +/* + * Allow the kernel debugger to reserve breakpoint slots without + * taking a lock using the dbg_* variant of for the reserve and + * release breakpoint slots. + */ +int dbg_reserve_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + return __reserve_bp_slot(bp); +} + +int dbg_release_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + __release_bp_slot(bp); + + return 0; +} -int __register_perf_hw_breakpoint(struct perf_event *bp) +static int validate_hw_breakpoint(struct perf_event *bp) { int ret; - ret = reserve_bp_slot(bp); + ret = arch_validate_hwbkpt_settings(bp); if (ret) return ret; - if (!bp->attr.disabled) - ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); + if (arch_check_bp_in_kernelspace(bp)) { + if (bp->attr.exclude_kernel) + return -EINVAL; + /* + * Don't let unprivileged users set a breakpoint in the trap + * path to avoid trap recursion attacks. + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + } - return ret; + return 0; } int register_perf_hw_breakpoint(struct perf_event *bp) { - bp->callback = perf_bp_event; - - return __register_perf_hw_breakpoint(bp); -} - -/* - * Register a breakpoint bound to a task and a given cpu. - * If cpu is -1, the breakpoint is active for the task in every cpu - * If the task is -1, the breakpoint is active for every tasks in the given - * cpu. - */ -static struct perf_event * -register_user_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - pid_t pid, - int cpu, - bool active) -{ - struct perf_event_attr *attr; - struct perf_event *bp; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - if (!attr) - return ERR_PTR(-ENOMEM); + int ret; - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(*attr); - attr->bp_addr = addr; - attr->bp_len = len; - attr->bp_type = type; - /* - * Such breakpoints are used by debuggers to trigger signals when - * we hit the excepted memory op. We can't miss such events, they - * must be pinned. - */ - attr->pinned = 1; + ret = reserve_bp_slot(bp); + if (ret) + return ret; - if (!active) - attr->disabled = 1; + ret = validate_hw_breakpoint(bp); - bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); - kfree(attr); + /* if arch_validate_hwbkpt_settings() fails then release bp slot */ + if (ret) + release_bp_slot(bp); - return bp; + return ret; } /** * register_user_hw_breakpoint - register a hardware breakpoint for user space - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it - * */ struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - struct task_struct *tsk, - bool active) +register_user_hw_breakpoint(struct perf_event_attr *attr, + perf_overflow_handler_t triggered, + struct task_struct *tsk) { - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - tsk->pid, -1, active); + return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint * @bp: the breakpoint structure to modify - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: new breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it */ -struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, - perf_callback_t triggered, - struct task_struct *tsk, - bool active) +int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { - /* - * FIXME: do it without unregistering - * - We don't want to lose our slot - * - If the new bp is incorrect, don't lose the older one - */ - unregister_hw_breakpoint(bp); + u64 old_addr = bp->attr.bp_addr; + u64 old_len = bp->attr.bp_len; + int old_type = bp->attr.bp_type; + int err = 0; + + perf_event_disable(bp); + + bp->attr.bp_addr = attr->bp_addr; + bp->attr.bp_type = attr->bp_type; + bp->attr.bp_len = attr->bp_len; + + if (attr->disabled) + goto end; - return register_user_hw_breakpoint(addr, len, type, triggered, - tsk, active); + err = validate_hw_breakpoint(bp); + if (!err) + perf_event_enable(bp); + + if (err) { + bp->attr.bp_addr = old_addr; + bp->attr.bp_type = old_type; + bp->attr.bp_len = old_len; + if (!bp->attr.disabled) + perf_event_enable(bp); + + return err; + } + +end: + bp->attr.disabled = attr->disabled; + + return 0; } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); @@ -389,68 +477,52 @@ void unregister_hw_breakpoint(struct perf_event *bp) } EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); -static struct perf_event * -register_kernel_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - int cpu, - bool active) -{ - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - -1, cpu, active); -} - /** * register_wide_hw_breakpoint - register a wide breakpoint in the kernel - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint - * @active: should we activate it while registering it * * @return a set of per_cpu pointers to perf events */ -struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active) +struct perf_event * __percpu * +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_overflow_handler_t triggered) { - struct perf_event **cpu_events, **pevent, *bp; + struct perf_event * __percpu *cpu_events, **pevent, *bp; long err; int cpu; cpu_events = alloc_percpu(typeof(*cpu_events)); if (!cpu_events) - return ERR_PTR(-ENOMEM); + return (void __percpu __force *)ERR_PTR(-ENOMEM); - for_each_possible_cpu(cpu) { + get_online_cpus(); + for_each_online_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); - bp = register_kernel_hw_breakpoint_cpu(addr, len, type, - triggered, cpu, active); + bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); *pevent = bp; - if (IS_ERR(bp) || !bp) { + if (IS_ERR(bp)) { err = PTR_ERR(bp); goto fail; } } + put_online_cpus(); return cpu_events; fail: - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); - if (IS_ERR(*pevent) || !*pevent) + if (IS_ERR(*pevent)) break; unregister_hw_breakpoint(*pevent); } + put_online_cpus(); + free_percpu(cpu_events); - /* return the error if any */ - return ERR_PTR(err); + return (void __percpu __force *)ERR_PTR(err); } EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); @@ -458,7 +530,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel * @cpu_events: the per cpu set of events to unregister */ -void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) +void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { int cpu; struct perf_event **pevent; @@ -479,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { static int __init init_hw_breakpoint(void) { + unsigned int **task_bp_pinned; + int cpu, err_cpu; + int i; + + for (i = 0; i < TYPE_MAX; i++) + nr_slots[i] = hw_breakpoint_slots(i); + + for_each_possible_cpu(cpu) { + for (i = 0; i < TYPE_MAX; i++) { + task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); + *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], + GFP_KERNEL); + if (!*task_bp_pinned) + goto err_alloc; + } + } + + constraints_initialized = 1; + return register_die_notifier(&hw_breakpoint_exceptions_nb); + + err_alloc: + for_each_possible_cpu(err_cpu) { + if (err_cpu == cpu) + break; + for (i = 0; i < TYPE_MAX; i++) + kfree(per_cpu(nr_task_bp_pinned[i], cpu)); + } + + return -ENOMEM; } core_initcall(init_hw_breakpoint); @@ -488,5 +589,4 @@ struct pmu perf_ops_bp = { .enable = arch_install_hw_breakpoint, .disable = arch_uninstall_hw_breakpoint, .read = hw_breakpoint_pmu_read, - .unthrottle = hw_breakpoint_pmu_unthrottle };