X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=arch%2Fx86%2Fkernel%2Fds.c;h=87b67e3a765ac212c2c954de3566ec487fdac8da;hb=82268da1b130f763d22d04f7d016bbf6fc8815c2;hp=6eb5d49a36bb8240ab41794375e35c537c108138;hpb=a95d67f87e1a5f1b4429be3ba3bf7b4051657908;p=safe%2Fjmp%2Flinux-2.6 diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 6eb5d49..87b67e3 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -2,27 +2,94 @@ * Debug Store support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - DS and BTS hardware configuration + * - buffer overflow handling (to be done) + * - buffer access * + * It does not do: + * - security checking (is the caller allowed to trace the task) + * - buffer allocation (memory accounting) * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger , Dec 2007 + * Copyright (C) 2007-2009 Intel Corporation. + * Markus Metzger , 2007-2009 */ + #include #include #include #include +#include +#include +#include + + +/* + * The configuration for a particular DS hardware implementation. + */ +struct ds_configuration { + /* the name of the configuration */ + const char *name; + /* the size of one pointer-typed field in the DS structure and + in the BTS and PEBS buffers in bytes; + this covers the first 8 DS fields related to buffer management. */ + unsigned char sizeof_field; + /* the size of a BTS/PEBS record in bytes */ + unsigned char sizeof_rec[2]; + /* a series of bit-masks to control various features indexed + * by enum ds_feature */ + unsigned long ctl[dsf_ctl_max]; +}; +static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); + +#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) + +#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ +#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ +#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ + +#define BTS_CONTROL \ + (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ + ds_cfg.ctl[dsf_bts_overflow]) + + +/* + * A BTS or PEBS tracer. + * + * This holds the configuration of the tracer and serves as a handle + * to identify tracers. + */ +struct ds_tracer { + /* the DS context (partially) owned by this tracer */ + struct ds_context *context; + /* the buffer provided on ds_request() and its size in bytes */ + void *buffer; + size_t size; +}; +struct bts_tracer { + /* the common DS part */ + struct ds_tracer ds; + /* the trace including the DS configuration */ + struct bts_trace trace; + /* buffer overflow notification function */ + bts_ovfl_callback_t ovfl; +}; + +struct pebs_tracer { + /* the common DS part */ + struct ds_tracer ds; + /* the trace including the DS configuration */ + struct pebs_trace trace; + /* buffer overflow notification function */ + pebs_ovfl_callback_t ovfl; +}; /* * Debug Store (DS) save area configuration (see Intel64 and IA32 @@ -44,385 +111,837 @@ * (interrupt occurs when write pointer passes interrupt pointer) * - value to which counter is reset following counter overflow * - * On later architectures, the last branch recording hardware uses - * 64bit pointers even in 32bit mode. + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. * * - * Branch Trace Store (BTS) records store information about control - * flow changes. They at least provide the following information: - * - source linear address - * - destination linear address + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * - an offset giving the start of the respective region * - * Netburst supported a predicated bit that had been dropped in later - * architectures. We do not suppor it. + * This offset is further used to index various arrays holding + * information for BTS and PEBS at the respective index. * - * - * In order to abstract from the actual DS and BTS layout, we describe - * the access to the relevant fields. - * Thanks to Andi Kleen for proposing this design. - * - * The implementation, however, is not as general as it might seem. In - * order to stay somewhat simple and efficient, we assume an - * underlying unsigned type (mostly a pointer type) and we expect the - * field to be at least as big as that type. + * On later 32bit processors, we only access the lower 32bit of the + * 64bit pointer fields. The upper halves will be zeroed out. */ -/* - * A special from_ip address to indicate that the BTS record is an - * info record that needs to be interpreted or skipped. - */ -#define BTS_ESCAPE_ADDRESS (-1) - -/* - * A field access descriptor - */ -struct access_desc { - unsigned char offset; - unsigned char size; +enum ds_field { + ds_buffer_base = 0, + ds_index, + ds_absolute_maximum, + ds_interrupt_threshold, }; -/* - * The configuration for a particular DS/BTS hardware implementation. - */ -struct ds_configuration { - /* the DS configuration */ - unsigned char sizeof_ds; - struct access_desc bts_buffer_base; - struct access_desc bts_index; - struct access_desc bts_absolute_maximum; - struct access_desc bts_interrupt_threshold; - /* the BTS configuration */ - unsigned char sizeof_bts; - struct access_desc from_ip; - struct access_desc to_ip; - /* BTS variants used to store additional information like - timestamps */ - struct access_desc info_type; - struct access_desc info_data; - unsigned long debugctl_mask; +enum ds_qualifier { + ds_bts = 0, + ds_pebs }; +static inline unsigned long ds_get(const unsigned char *base, + enum ds_qualifier qual, enum ds_field field) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + return *(unsigned long *)base; +} + +static inline void ds_set(unsigned char *base, enum ds_qualifier qual, + enum ds_field field, unsigned long value) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + (*(unsigned long *)base) = value; +} + + /* - * The global configuration used by the below accessor functions + * Locking is done only for allocating BTS or PEBS resources. */ -static struct ds_configuration ds_cfg; +static DEFINE_SPINLOCK(ds_lock); + /* - * Accessor functions for some DS and BTS fields using the above - * global ptrace_bts_cfg. + * We either support (system-wide) per-cpu or per-thread allocation. + * We distinguish the two based on the task_struct pointer, where a + * NULL pointer indicates per-cpu allocation for the current cpu. + * + * Allocations are use-counted. As soon as resources are allocated, + * further allocations must be of the same type (per-cpu or + * per-thread). We model this by counting allocations (i.e. the number + * of tracers of a certain type) for one type negatively: + * =0 no tracers + * >0 number of per-thread tracers + * <0 number of per-cpu tracers + * + * Tracers essentially gives the number of ds contexts for a certain + * type of allocation. */ -static inline void *get_bts_buffer_base(char *base) +static atomic_t tracers = ATOMIC_INIT(0); + +static inline void get_tracer(struct task_struct *task) { - return *(void **)(base + ds_cfg.bts_buffer_base.offset); + if (task) + atomic_inc(&tracers); + else + atomic_dec(&tracers); } -static inline void set_bts_buffer_base(char *base, void *value) + +static inline void put_tracer(struct task_struct *task) { - (*(void **)(base + ds_cfg.bts_buffer_base.offset)) = value; + if (task) + atomic_dec(&tracers); + else + atomic_inc(&tracers); } -static inline void *get_bts_index(char *base) + +static inline int check_tracer(struct task_struct *task) { - return *(void **)(base + ds_cfg.bts_index.offset); + return task ? + (atomic_read(&tracers) >= 0) : + (atomic_read(&tracers) <= 0); } -static inline void set_bts_index(char *base, void *value) + + +/* + * The DS context is either attached to a thread or to a cpu: + * - in the former case, the thread_struct contains a pointer to the + * attached context. + * - in the latter case, we use a static array of per-cpu context + * pointers. + * + * Contexts are use-counted. They are allocated on first access and + * deallocated when the last user puts the context. + */ +struct ds_context { + /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ + unsigned char ds[MAX_SIZEOF_DS]; + /* the owner of the BTS and PEBS configuration, respectively */ + struct bts_tracer *bts_master; + struct pebs_tracer *pebs_master; + /* use count */ + unsigned long count; + /* a pointer to the context location inside the thread_struct + * or the per_cpu context array */ + struct ds_context **this; + /* a pointer to the task owning this context, or NULL, if the + * context is owned by a cpu */ + struct task_struct *task; +}; + +static DEFINE_PER_CPU(struct ds_context *, system_context_array); + +#define system_context per_cpu(system_context_array, smp_processor_id()) + + +static inline struct ds_context *ds_get_context(struct task_struct *task) { - (*(void **)(base + ds_cfg.bts_index.offset)) = value; + struct ds_context **p_context = + (task ? &task->thread.ds_ctx : &system_context); + struct ds_context *context = NULL; + struct ds_context *new_context = NULL; + unsigned long irq; + + /* Chances are small that we already have a context. */ + new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); + if (!new_context) + return NULL; + + spin_lock_irqsave(&ds_lock, irq); + + context = *p_context; + if (!context) { + context = new_context; + + context->this = p_context; + context->task = task; + context->count = 0; + + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + + if (!task || (task == current)) + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); + + *p_context = context; + } + + context->count++; + + spin_unlock_irqrestore(&ds_lock, irq); + + if (context != new_context) + kfree(new_context); + + return context; } -static inline void *get_bts_absolute_maximum(char *base) + +static inline void ds_put_context(struct ds_context *context) { - return *(void **)(base + ds_cfg.bts_absolute_maximum.offset); + unsigned long irq; + + if (!context) + return; + + spin_lock_irqsave(&ds_lock, irq); + + if (--context->count) { + spin_unlock_irqrestore(&ds_lock, irq); + return; + } + + *(context->this) = NULL; + + if (context->task) + clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + + if (!context->task || (context->task == current)) + wrmsrl(MSR_IA32_DS_AREA, 0); + + spin_unlock_irqrestore(&ds_lock, irq); + + kfree(context); } -static inline void set_bts_absolute_maximum(char *base, void *value) + + +/* + * Call the tracer's callback on a buffer overflow. + * + * context: the ds context + * qual: the buffer type + */ +static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) { - (*(void **)(base + ds_cfg.bts_absolute_maximum.offset)) = value; + switch (qual) { + case ds_bts: + if (context->bts_master && + context->bts_master->ovfl) + context->bts_master->ovfl(context->bts_master); + break; + case ds_pebs: + if (context->pebs_master && + context->pebs_master->ovfl) + context->pebs_master->ovfl(context->pebs_master); + break; + } } -static inline void *get_bts_interrupt_threshold(char *base) + + +/* + * Write raw data into the BTS or PEBS buffer. + * + * The remainder of any partially written record is zeroed out. + * + * context: the DS context + * qual: the buffer type + * record: the data to write + * size: the size of the data + */ +static int ds_write(struct ds_context *context, enum ds_qualifier qual, + const void *record, size_t size) { - return *(void **)(base + ds_cfg.bts_interrupt_threshold.offset); + int bytes_written = 0; + + if (!record) + return -EINVAL; + + while (size) { + unsigned long base, index, end, write_end, int_th; + unsigned long write_size, adj_write_size; + + /* + * write as much as possible without producing an + * overflow interrupt. + * + * interrupt_threshold must either be + * - bigger than absolute_maximum or + * - point to a record between buffer_base and absolute_maximum + * + * index points to a valid record. + */ + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + end = ds_get(context->ds, qual, ds_absolute_maximum); + int_th = ds_get(context->ds, qual, ds_interrupt_threshold); + + write_end = min(end, int_th); + + /* if we are already beyond the interrupt threshold, + * we fill the entire buffer */ + if (write_end <= index) + write_end = end; + + if (write_end <= index) + break; + + write_size = min((unsigned long) size, write_end - index); + memcpy((void *)index, record, write_size); + + record = (const char *)record + write_size; + size -= write_size; + bytes_written += write_size; + + adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; + adj_write_size *= ds_cfg.sizeof_rec[qual]; + + /* zero out trailing bytes */ + memset((char *)index + write_size, 0, + adj_write_size - write_size); + index += adj_write_size; + + if (index >= end) + index = base; + ds_set(context->ds, qual, ds_index, index); + + if (index >= int_th) + ds_overflow(context, qual); + } + + return bytes_written; } -static inline void set_bts_interrupt_threshold(char *base, void *value) + + +/* + * Branch Trace Store (BTS) uses the following format. Different + * architectures vary in the size of those fields. + * - source linear address + * - destination linear address + * - flags + * + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. + * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * + * In order to store additional information in the BTS buffer, we use + * a special source address to indicate that the record requires + * special interpretation. + * + * Netburst indicated via a bit in the flags field whether the branch + * was predicted; this is ignored. + * + * We use two levels of abstraction: + * - the raw data level defined here + * - an arch-independent level defined in ds.h + */ + +enum bts_field { + bts_from, + bts_to, + bts_flags, + + bts_qual = bts_from, + bts_jiffies = bts_to, + bts_pid = bts_flags, + + bts_qual_mask = (bts_qual_max - 1), + bts_escape = ((unsigned long)-1 & ~bts_qual_mask) +}; + +static inline unsigned long bts_get(const char *base, enum bts_field field) { - (*(void **)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; + base += (ds_cfg.sizeof_field * field); + return *(unsigned long *)base; } -static inline long get_from_ip(char *base) + +static inline void bts_set(char *base, enum bts_field field, unsigned long val) { - return *(long *)(base + ds_cfg.from_ip.offset); + base += (ds_cfg.sizeof_field * field);; + (*(unsigned long *)base) = val; } -static inline void set_from_ip(char *base, long value) + + +/* + * The raw BTS data is architecture dependent. + * + * For higher-level users, we give an arch-independent view. + * - ds.h defines struct bts_struct + * - bts_read translates one raw bts record into a bts_struct + * - bts_write translates one bts_struct into the raw format and + * writes it into the top of the parameter tracer's buffer. + * + * return: bytes read/written on success; -Eerrno, otherwise + */ +static int bts_read(struct bts_tracer *tracer, const void *at, + struct bts_struct *out) { - (*(long *)(base + ds_cfg.from_ip.offset)) = value; + if (!tracer) + return -EINVAL; + + if (at < tracer->trace.ds.begin) + return -EINVAL; + + if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) + return -EINVAL; + + memset(out, 0, sizeof(*out)); + if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { + out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); + out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); + out->variant.timestamp.pid = bts_get(at, bts_pid); + } else { + out->qualifier = bts_branch; + out->variant.lbr.from = bts_get(at, bts_from); + out->variant.lbr.to = bts_get(at, bts_to); + + if (!out->variant.lbr.from && !out->variant.lbr.to) + out->qualifier = bts_invalid; + } + + return ds_cfg.sizeof_rec[ds_bts]; } -static inline long get_to_ip(char *base) + +static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) { - return *(long *)(base + ds_cfg.to_ip.offset); + unsigned char raw[MAX_SIZEOF_BTS]; + + if (!tracer) + return -EINVAL; + + if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) + return -EOVERFLOW; + + switch (in->qualifier) { + case bts_invalid: + bts_set(raw, bts_from, 0); + bts_set(raw, bts_to, 0); + bts_set(raw, bts_flags, 0); + break; + case bts_branch: + bts_set(raw, bts_from, in->variant.lbr.from); + bts_set(raw, bts_to, in->variant.lbr.to); + bts_set(raw, bts_flags, 0); + break; + case bts_task_arrives: + case bts_task_departs: + bts_set(raw, bts_qual, (bts_escape | in->qualifier)); + bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); + bts_set(raw, bts_pid, in->variant.timestamp.pid); + break; + default: + return -EINVAL; + } + + return ds_write(tracer->ds.context, ds_bts, raw, + ds_cfg.sizeof_rec[ds_bts]); } -static inline void set_to_ip(char *base, long value) + + +static void ds_write_config(struct ds_context *context, + struct ds_trace *cfg, enum ds_qualifier qual) { - (*(long *)(base + ds_cfg.to_ip.offset)) = value; + unsigned char *ds = context->ds; + + ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); + ds_set(ds, qual, ds_index, (unsigned long)cfg->top); + ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); + ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); } -static inline unsigned char get_info_type(char *base) + +static void ds_read_config(struct ds_context *context, + struct ds_trace *cfg, enum ds_qualifier qual) { - return *(unsigned char *)(base + ds_cfg.info_type.offset); + unsigned char *ds = context->ds; + + cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); + cfg->top = (void *)ds_get(ds, qual, ds_index); + cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); + cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); } -static inline void set_info_type(char *base, unsigned char value) -{ - (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; + +static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, + void *base, size_t size, size_t ith, + unsigned int flags) { + unsigned long buffer, adj; + + /* adjust the buffer address and size to meet alignment + * constraints: + * - buffer is double-word aligned + * - size is multiple of record size + * + * We checked the size at the very beginning; we have enough + * space to do the adjustment. + */ + buffer = (unsigned long)base; + + adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; + buffer += adj; + size -= adj; + + trace->n = size / ds_cfg.sizeof_rec[qual]; + trace->size = ds_cfg.sizeof_rec[qual]; + + size = (trace->n * trace->size); + + trace->begin = (void *)buffer; + trace->top = trace->begin; + trace->end = (void *)(buffer + size); + /* The value for 'no threshold' is -1, which will set the + * threshold outside of the buffer, just like we want it. + */ + trace->ith = (void *)(buffer + size - ith); + + trace->flags = flags; } -static inline unsigned long get_info_data(char *base) + + +static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, + enum ds_qualifier qual, struct task_struct *task, + void *base, size_t size, size_t th, unsigned int flags) { - return *(unsigned long *)(base + ds_cfg.info_data.offset); + struct ds_context *context; + int error; + + error = -EINVAL; + if (!base) + goto out; + + /* we require some space to do alignment adjustments below */ + error = -EINVAL; + if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) + goto out; + + if (th != (size_t)-1) { + th *= ds_cfg.sizeof_rec[qual]; + + error = -EINVAL; + if (size <= th) + goto out; + } + + tracer->buffer = base; + tracer->size = size; + + error = -ENOMEM; + context = ds_get_context(task); + if (!context) + goto out; + tracer->context = context; + + ds_init_ds_trace(trace, qual, base, size, th, flags); + + error = 0; + out: + return error; } -static inline void set_info_data(char *base, unsigned long value) + +struct bts_tracer *ds_request_bts(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { - (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; -} + struct bts_tracer *tracer; + unsigned long irq; + int error; + error = -EOPNOTSUPP; + if (!ds_cfg.ctl[dsf_bts]) + goto out; -int ds_allocate(void **dsp, size_t bts_size_in_bytes) -{ - size_t bts_size_in_records; - void *bts; - void *ds; + /* buffer overflow notification is not yet implemented */ + error = -EOPNOTSUPP; + if (ovfl) + goto out; - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; + error = -ENOMEM; + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); + if (!tracer) + goto out; + tracer->ovfl = ovfl; - if (bts_size_in_bytes < 0) - return -EINVAL; + error = ds_request(&tracer->ds, &tracer->trace.ds, + ds_bts, task, base, size, th, flags); + if (error < 0) + goto out_tracer; - bts_size_in_records = - bts_size_in_bytes / ds_cfg.sizeof_bts; - bts_size_in_bytes = - bts_size_in_records * ds_cfg.sizeof_bts; - if (bts_size_in_bytes <= 0) - return -EINVAL; + spin_lock_irqsave(&ds_lock, irq); - bts = kzalloc(bts_size_in_bytes, GFP_KERNEL); + error = -EPERM; + if (!check_tracer(task)) + goto out_unlock; + get_tracer(task); - if (!bts) - return -ENOMEM; + error = -EPERM; + if (tracer->ds.context->bts_master) + goto out_put_tracer; + tracer->ds.context->bts_master = tracer; - ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + spin_unlock_irqrestore(&ds_lock, irq); - if (!ds) { - kfree(bts); - return -ENOMEM; - } - set_bts_buffer_base(ds, bts); - set_bts_index(ds, bts); - set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); - set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); + tracer->trace.read = bts_read; + tracer->trace.write = bts_write; - *dsp = ds; - return 0; + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + ds_resume_bts(tracer); + + return tracer; + + out_put_tracer: + put_tracer(task); + out_unlock: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(tracer->ds.context); + out_tracer: + kfree(tracer); + out: + return ERR_PTR(error); } -int ds_free(void **dsp) +struct pebs_tracer *ds_request_pebs(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { - if (*dsp) - kfree(get_bts_buffer_base(*dsp)); - kfree(*dsp); - *dsp = 0; + struct pebs_tracer *tracer; + unsigned long irq; + int error; - return 0; + /* buffer overflow notification is not yet implemented */ + error = -EOPNOTSUPP; + if (ovfl) + goto out; + + error = -ENOMEM; + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); + if (!tracer) + goto out; + tracer->ovfl = ovfl; + + error = ds_request(&tracer->ds, &tracer->trace.ds, + ds_pebs, task, base, size, th, flags); + if (error < 0) + goto out_tracer; + + spin_lock_irqsave(&ds_lock, irq); + + error = -EPERM; + if (!check_tracer(task)) + goto out_unlock; + get_tracer(task); + + error = -EPERM; + if (tracer->ds.context->pebs_master) + goto out_put_tracer; + tracer->ds.context->pebs_master = tracer; + + spin_unlock_irqrestore(&ds_lock, irq); + + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); + ds_resume_pebs(tracer); + + return tracer; + + out_put_tracer: + put_tracer(task); + out_unlock: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(tracer->ds.context); + out_tracer: + kfree(tracer); + out: + return ERR_PTR(error); } -int ds_get_bts_size(void *ds) +void ds_release_bts(struct bts_tracer *tracer) { - size_t size_in_bytes; + if (!tracer) + return; + + ds_suspend_bts(tracer); - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; + WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); + tracer->ds.context->bts_master = NULL; - if (!ds) - return 0; + put_tracer(tracer->ds.context->task); + ds_put_context(tracer->ds.context); - size_in_bytes = - get_bts_absolute_maximum(ds) - - get_bts_buffer_base(ds); - return size_in_bytes; + kfree(tracer); } -int ds_get_bts_end(void *ds) +void ds_suspend_bts(struct bts_tracer *tracer) { - size_t size_in_bytes = ds_get_bts_size(ds); + struct task_struct *task; - if (size_in_bytes <= 0) - return size_in_bytes; + if (!tracer) + return; - return size_in_bytes / ds_cfg.sizeof_bts; + task = tracer->ds.context->task; + + if (!task || (task == current)) + update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); + + if (task) { + task->thread.debugctlmsr &= ~BTS_CONTROL; + + if (!task->thread.debugctlmsr) + clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); + } } -int ds_get_bts_index(void *ds) +void ds_resume_bts(struct bts_tracer *tracer) { - size_t index_offset_in_bytes; + struct task_struct *task; + unsigned long control; - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; + if (!tracer) + return; - index_offset_in_bytes = - get_bts_index(ds) - - get_bts_buffer_base(ds); + task = tracer->ds.context->task; - return index_offset_in_bytes / ds_cfg.sizeof_bts; + control = ds_cfg.ctl[dsf_bts]; + if (!(tracer->trace.ds.flags & BTS_KERNEL)) + control |= ds_cfg.ctl[dsf_bts_kernel]; + if (!(tracer->trace.ds.flags & BTS_USER)) + control |= ds_cfg.ctl[dsf_bts_user]; + + if (task) { + task->thread.debugctlmsr |= control; + set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); + } + + if (!task || (task == current)) + update_debugctlmsr(get_debugctlmsr() | control); } -int ds_set_overflow(void *ds, int method) +void ds_release_pebs(struct pebs_tracer *tracer) { - switch (method) { - case DS_O_SIGNAL: - return -EOPNOTSUPP; - case DS_O_WRAP: - return 0; - default: - return -EINVAL; - } + if (!tracer) + return; + + ds_suspend_pebs(tracer); + + WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); + tracer->ds.context->pebs_master = NULL; + + put_tracer(tracer->ds.context->task); + ds_put_context(tracer->ds.context); + + kfree(tracer); } -int ds_get_overflow(void *ds) +void ds_suspend_pebs(struct pebs_tracer *tracer) { - return DS_O_WRAP; + } -int ds_clear(void *ds) +void ds_resume_pebs(struct pebs_tracer *tracer) { - int bts_size = ds_get_bts_size(ds); - void *bts_base; - if (bts_size <= 0) - return bts_size; +} - bts_base = get_bts_buffer_base(ds); - memset(bts_base, 0, bts_size); +const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) +{ + if (!tracer) + return NULL; - set_bts_index(ds, bts_base); - return 0; + ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + return &tracer->trace; } -int ds_read_bts(void *ds, size_t index, struct bts_struct *out) +const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) { - void *bts; + if (!tracer) + return NULL; - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; + ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); + tracer->trace.reset_value = + *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); - if (index < 0) - return -EINVAL; + return &tracer->trace; +} - if (index >= ds_get_bts_size(ds)) +int ds_reset_bts(struct bts_tracer *tracer) +{ + if (!tracer) return -EINVAL; - bts = get_bts_buffer_base(ds); - bts = (char *)bts + (index * ds_cfg.sizeof_bts); + tracer->trace.ds.top = tracer->trace.ds.begin; - memset(out, 0, sizeof(*out)); - if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { - out->qualifier = get_info_type(bts); - out->variant.jiffies = get_info_data(bts); - } else { - out->qualifier = BTS_BRANCH; - out->variant.lbr.from_ip = get_from_ip(bts); - out->variant.lbr.to_ip = get_to_ip(bts); - } + ds_set(tracer->ds.context->ds, ds_bts, ds_index, + (unsigned long)tracer->trace.ds.top); return 0; } -int ds_write_bts(void *ds, const struct bts_struct *in) +int ds_reset_pebs(struct pebs_tracer *tracer) { - void *bts; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (ds_get_bts_size(ds) <= 0) - return -ENXIO; + if (!tracer) + return -EINVAL; - bts = get_bts_index(ds); + tracer->trace.ds.top = tracer->trace.ds.begin; - memset(bts, 0, ds_cfg.sizeof_bts); - switch (in->qualifier) { - case BTS_INVALID: - break; + ds_set(tracer->ds.context->ds, ds_bts, ds_index, + (unsigned long)tracer->trace.ds.top); - case BTS_BRANCH: - set_from_ip(bts, in->variant.lbr.from_ip); - set_to_ip(bts, in->variant.lbr.to_ip); - break; - - case BTS_TASK_ARRIVES: - case BTS_TASK_DEPARTS: - set_from_ip(bts, BTS_ESCAPE_ADDRESS); - set_info_type(bts, in->qualifier); - set_info_data(bts, in->variant.jiffies); - break; + return 0; +} - default: +int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) +{ + if (!tracer) return -EINVAL; - } - bts = (char *)bts + ds_cfg.sizeof_bts; - if (bts >= get_bts_absolute_maximum(ds)) - bts = get_bts_buffer_base(ds); - set_bts_index(ds, bts); + *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; return 0; } -unsigned long ds_debugctl_mask(void) -{ - return ds_cfg.debugctl_mask; -} +static const struct ds_configuration ds_cfg_netburst = { + .name = "Netburst", + .ctl[dsf_bts] = (1 << 2) | (1 << 3), + .ctl[dsf_bts_kernel] = (1 << 5), + .ctl[dsf_bts_user] = (1 << 6), + .sizeof_field = sizeof(long), + .sizeof_rec[ds_bts] = sizeof(long) * 3, #ifdef __i386__ -static const struct ds_configuration ds_cfg_netburst = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<2)|(1<<3) + .sizeof_rec[ds_pebs] = sizeof(long) * 10, +#else + .sizeof_rec[ds_pebs] = sizeof(long) * 18, +#endif }; - static const struct ds_configuration ds_cfg_pentium_m = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<6)|(1<<7) + .name = "Pentium M", + .ctl[dsf_bts] = (1 << 6) | (1 << 7), + + .sizeof_field = sizeof(long), + .sizeof_rec[ds_bts] = sizeof(long) * 3, +#ifdef __i386__ + .sizeof_rec[ds_pebs] = sizeof(long) * 10, +#else + .sizeof_rec[ds_pebs] = sizeof(long) * 18, +#endif }; -#endif /* _i386_ */ - -static const struct ds_configuration ds_cfg_core2 = { - .sizeof_ds = 9 * 8, - .bts_buffer_base = { 0, 8 }, - .bts_index = { 8, 8 }, - .bts_absolute_maximum = { 16, 8 }, - .bts_interrupt_threshold = { 24, 8 }, - .sizeof_bts = 3 * 8, - .from_ip = { 0, 8 }, - .to_ip = { 8, 8 }, - .info_type = { 8, 1 }, - .info_data = { 16, 8 }, - .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +static const struct ds_configuration ds_cfg_core2_atom = { + .name = "Core 2/Atom", + .ctl[dsf_bts] = (1 << 6) | (1 << 7), + .ctl[dsf_bts_kernel] = (1 << 9), + .ctl[dsf_bts_user] = (1 << 10), + + .sizeof_field = 8, + .sizeof_rec[ds_bts] = 8 * 3, + .sizeof_rec[ds_pebs] = 8 * 18, }; -static inline void +static void ds_configure(const struct ds_configuration *cfg) { + memset(&ds_cfg, 0, sizeof(ds_cfg)); ds_cfg = *cfg; + + printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); + + if (!cpu_has_bts) { + ds_cfg.ctl[dsf_bts] = 0; + printk(KERN_INFO "[ds] bts not available\n"); + } + if (!cpu_has_pebs) + printk(KERN_INFO "[ds] pebs not available\n"); + + WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); } void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) @@ -430,29 +949,28 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { -#ifdef __i386__ - case 0xD: - case 0xE: /* Pentium M */ + case 0x9: + case 0xd: /* Pentium M */ ds_configure(&ds_cfg_pentium_m); break; -#endif /* _i386_ */ - case 0xF: /* Core2 */ - ds_configure(&ds_cfg_core2); + case 0xf: + case 0x17: /* Core2 */ + case 0x1c: /* Atom */ + ds_configure(&ds_cfg_core2_atom); break; + case 0x1a: /* i7 */ default: /* sorry, don't know about them */ break; } break; - case 0xF: + case 0xf: switch (c->x86_model) { -#ifdef __i386__ case 0x0: case 0x1: case 0x2: /* Netburst */ ds_configure(&ds_cfg_netburst); break; -#endif /* _i386_ */ default: /* sorry, don't know about them */ break; @@ -463,3 +981,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; } } + +/* + * Change the DS configuration from tracing prev to tracing next. + */ +void ds_switch_to(struct task_struct *prev, struct task_struct *next) +{ + struct ds_context *prev_ctx = prev->thread.ds_ctx; + struct ds_context *next_ctx = next->thread.ds_ctx; + + if (prev_ctx) { + update_debugctlmsr(0); + + if (prev_ctx->bts_master && + (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { + struct bts_struct ts = { + .qualifier = bts_task_departs, + .variant.timestamp.jiffies = jiffies_64, + .variant.timestamp.pid = prev->pid + }; + bts_write(prev_ctx->bts_master, &ts); + } + } + + if (next_ctx) { + if (next_ctx->bts_master && + (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { + struct bts_struct ts = { + .qualifier = bts_task_arrives, + .variant.timestamp.jiffies = jiffies_64, + .variant.timestamp.pid = next->pid + }; + bts_write(next_ctx->bts_master, &ts); + } + + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); + } + + update_debugctlmsr(next->thread.debugctlmsr); +} + +void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) +{ + clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); + tsk->thread.ds_ctx = NULL; +} + +void ds_exit_thread(struct task_struct *tsk) +{ +}