* Copyright (C) 2004 William Lee Irwin III
*/
#include <linux/ring_buffer.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
#include <linux/stacktrace.h>
#include <linux/writeback.h>
#include <linux/kallsyms.h>
#include <linux/splice.h>
#include <linux/kdebug.h>
#include <linux/string.h>
+#include <linux/rwsem.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
*/
static int tracing_disabled = 1;
-DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+DEFINE_PER_CPU(int, ftrace_cpu_disabled);
static inline void ftrace_disable_cpu(void)
{
preempt_disable();
- local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+ __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
}
static inline void ftrace_enable_cpu(void)
{
- local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+ __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
preempt_enable();
}
static cpumask_var_t __read_mostly tracing_buffer_mask;
-/* Define which cpu buffers are currently read in trace_pipe */
-static cpumask_var_t tracing_reader_cpumask;
-
#define for_each_tracing_cpu(cpu) \
for_each_cpu(cpu, tracing_buffer_mask)
*/
static struct trace_array max_tr;
-static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
+static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
/* tracer_enabled is used to toggle activation of a tracer */
static int tracer_enabled = 1;
/*
* trace_types_lock is used to protect the trace_types list.
- * This lock is also used to keep user access serialized.
- * Accesses from userspace will grab this lock while userspace
- * activities happen inside the kernel.
*/
static DEFINE_MUTEX(trace_types_lock);
+/*
+ * serialize the access of the ring buffer
+ *
+ * ring buffer serializes readers, but it is low level protection.
+ * The validity of the events (which returns by ring_buffer_peek() ..etc)
+ * are not protected by ring buffer.
+ *
+ * The content of events may become garbage if we allow other process consumes
+ * these events concurrently:
+ * A) the page of the consumed events may become a normal page
+ * (not reader page) in ring buffer, and this page will be rewrited
+ * by events producer.
+ * B) The page of the consumed events may become a page for splice_read,
+ * and this page will be returned to system.
+ *
+ * These primitives allow multi process access to different cpu ring buffer
+ * concurrently.
+ *
+ * These primitives don't distinguish read-only and read-consume access.
+ * Multi read-only access are also serialized.
+ */
+
+#ifdef CONFIG_SMP
+static DECLARE_RWSEM(all_cpu_access_lock);
+static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+ if (cpu == TRACE_PIPE_ALL_CPU) {
+ /* gain it for accessing the whole ring buffer. */
+ down_write(&all_cpu_access_lock);
+ } else {
+ /* gain it for accessing a cpu ring buffer. */
+
+ /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
+ down_read(&all_cpu_access_lock);
+
+ /* Secondly block other access to this @cpu ring buffer. */
+ mutex_lock(&per_cpu(cpu_access_lock, cpu));
+ }
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+ if (cpu == TRACE_PIPE_ALL_CPU) {
+ up_write(&all_cpu_access_lock);
+ } else {
+ mutex_unlock(&per_cpu(cpu_access_lock, cpu));
+ up_read(&all_cpu_access_lock);
+ }
+}
+
+static inline void trace_access_lock_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ mutex_init(&per_cpu(cpu_access_lock, cpu));
+}
+
+#else
+
+static DEFINE_MUTEX(access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+ (void)cpu;
+ mutex_lock(&access_lock);
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+ (void)cpu;
+ mutex_unlock(&access_lock);
+}
+
+static inline void trace_access_lock_init(void)
+{
+}
+
+#endif
+
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
* protected by per_cpu spinlocks. But the action of the swap
* needs its own lock.
*
- * This is defined as a raw_spinlock_t in order to help
+ * This is defined as a arch_spinlock_t in order to help
* with performance when lockdep debugging is enabled.
*
* It is also used in other places outside the update_max_tr
* so it needs to be defined outside of the
* CONFIG_TRACER_MAX_TRACE.
*/
-static raw_spinlock_t ftrace_max_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t ftrace_max_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
#ifdef CONFIG_TRACER_MAX_TRACE
unsigned long __read_mostly tracing_max_latency;
return;
WARN_ON_ONCE(!irqs_disabled());
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
tr->buffer = max_tr.buffer;
max_tr.buffer = buf;
__update_max_tr(tr, tsk, cpu);
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
}
/**
return;
WARN_ON_ONCE(!irqs_disabled());
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
ftrace_disable_cpu();
WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
__update_max_tr(tr, tsk, cpu);
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
}
#endif /* CONFIG_TRACER_MAX_TRACE */
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
static int cmdline_idx;
-static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
/* temporary disable recording */
static atomic_t trace_record_cmdline_disabled __read_mostly;
* nor do we want to disable interrupts,
* so if we miss here, then better luck next time.
*/
- if (!__raw_spin_trylock(&trace_cmdline_lock))
+ if (!arch_spin_trylock(&trace_cmdline_lock))
return;
idx = map_pid_to_cmdline[tsk->pid];
memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
- __raw_spin_unlock(&trace_cmdline_lock);
+ arch_spin_unlock(&trace_cmdline_lock);
}
void trace_find_cmdline(int pid, char comm[])
return;
}
+ if (WARN_ON_ONCE(pid < 0)) {
+ strcpy(comm, "<XXX>");
+ return;
+ }
+
if (pid > PID_MAX_DEFAULT) {
strcpy(comm, "<...>");
return;
}
preempt_disable();
- __raw_spin_lock(&trace_cmdline_lock);
+ arch_spin_lock(&trace_cmdline_lock);
map = map_pid_to_cmdline[pid];
if (map != NO_CMDLINE_MAP)
strcpy(comm, saved_cmdlines[map]);
else
strcpy(comm, "<...>");
- __raw_spin_unlock(&trace_cmdline_lock);
+ arch_spin_unlock(&trace_cmdline_lock);
preempt_enable();
}
struct ftrace_entry *entry;
/* If we are reading the ring buffer, don't trace */
- if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
return;
event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
*/
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
{
- static raw_spinlock_t trace_buf_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t trace_buf_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
static u32 trace_buf[TRACE_BUF_SIZE];
struct ftrace_event_call *call = &event_bprint;
/* Lockdep uses trace_printk for lock tracing */
local_irq_save(flags);
- __raw_spin_lock(&trace_buf_lock);
+ arch_spin_lock(&trace_buf_lock);
len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
if (len > TRACE_BUF_SIZE || len < 0)
entry->fmt = fmt;
memcpy(entry->buf, trace_buf, sizeof(u32) * len);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, flags, 6, pc);
+ }
out_unlock:
- __raw_spin_unlock(&trace_buf_lock);
+ arch_spin_unlock(&trace_buf_lock);
local_irq_restore(flags);
out:
int trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args)
{
- static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static char trace_buf[TRACE_BUF_SIZE];
struct ftrace_event_call *call = &event_print;
pause_graph_tracing();
raw_local_irq_save(irq_flags);
- __raw_spin_lock(&trace_buf_lock);
+ arch_spin_lock(&trace_buf_lock);
len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
size = sizeof(*entry) + len + 1;
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = '\0';
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, irq_flags, 6, pc);
+ }
out_unlock:
- __raw_spin_unlock(&trace_buf_lock);
+ arch_spin_unlock(&trace_buf_lock);
raw_local_irq_restore(irq_flags);
unpause_graph_tracing();
out:
}
/*
- * No necessary locking here. The worst thing which can
- * happen is loosing events consumed at the same time
- * by a trace_pipe reader.
- * Other than that, we don't risk to crash the ring buffer
- * because it serializes the readers.
- *
* The current tracer is copied to avoid a global locking
* all around.
*/
}
trace_event_read_lock();
+ trace_access_lock(cpu_file);
return p;
}
static void s_stop(struct seq_file *m, void *p)
{
+ struct trace_iterator *iter = m->private;
+
atomic_dec(&trace_record_cmdline_disabled);
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
}
mutex_lock(&tracing_cpumask_update_lock);
local_irq_disable();
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
for_each_tracing_cpu(cpu) {
/*
* Increase/decrease the disabled counter if we are
atomic_dec(&global_trace.data[cpu]->disabled);
}
}
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
local_irq_enable();
cpumask_copy(tracing_cpumask, tracing_cpumask_new);
mutex_lock(&trace_types_lock);
- /* We only allow one reader per cpu */
- if (cpu_file == TRACE_PIPE_ALL_CPU) {
- if (!cpumask_empty(tracing_reader_cpumask)) {
- ret = -EBUSY;
- goto out;
- }
- cpumask_setall(tracing_reader_cpumask);
- } else {
- if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
- cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
- else {
- ret = -EBUSY;
- goto out;
- }
- }
-
/* create a buffer to store the information to pass to userspace */
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter) {
mutex_lock(&trace_types_lock);
- if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
- cpumask_clear(tracing_reader_cpumask);
- else
- cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
-
-
if (iter->trace->pipe_close)
iter->trace->pipe_close(iter);
iter->pos = -1;
trace_event_read_lock();
+ trace_access_lock(iter->cpu_file);
while (find_next_entry_inc(iter) != NULL) {
enum print_line_t ret;
int len = iter->seq.len;
if (iter->seq.len >= cnt)
break;
}
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
/* Now copy what we have to the user */
__free_page(spd->pages[idx]);
}
-static struct pipe_buf_operations tracing_pipe_buf_ops = {
+static const struct pipe_buf_operations tracing_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
}
trace_event_read_lock();
+ trace_access_lock(iter->cpu_file);
/* Fill as many pages as possible. */
for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
trace_seq_init(&iter->seq);
}
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
mutex_unlock(&iter->mutex);
info->read = 0;
+ trace_access_lock(info->cpu);
ret = ring_buffer_read_page(info->tr->buffer,
&info->spare,
count,
info->cpu, 0);
+ trace_access_unlock(info->cpu);
if (ret < 0)
return 0;
}
/* Pipe buffer operations for a buffer. */
-static struct pipe_buf_operations buffer_pipe_buf_ops = {
+static const struct pipe_buf_operations buffer_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
len &= PAGE_MASK;
}
+ trace_access_lock(info->cpu);
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
}
+ trace_access_unlock(info->cpu);
spd.nr_pages = i;
/* did we read anything? */
if (!!(topt->flags->val & topt->opt->bit) != val) {
mutex_lock(&trace_types_lock);
ret = __set_tracer_option(current_trace, topt->flags,
- topt->opt, val);
+ topt->opt, !val);
mutex_unlock(&trace_types_lock);
if (ret)
return ret;
struct dentry *d_tracer;
int cpu;
+ trace_access_lock_init();
+
d_tracer = tracing_init_dentry();
trace_create_file("tracing_enabled", 0644, d_tracer,
static void __ftrace_dump(bool disable_tracing)
{
- static raw_spinlock_t ftrace_dump_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t ftrace_dump_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
/* use static because iter can be a bit big for the stack */
static struct trace_iterator iter;
unsigned int old_userobj;
/* only one dump */
local_irq_save(flags);
- __raw_spin_lock(&ftrace_dump_lock);
+ arch_spin_lock(&ftrace_dump_lock);
if (dump_ran)
goto out;
}
out:
- __raw_spin_unlock(&ftrace_dump_lock);
+ arch_spin_unlock(&ftrace_dump_lock);
local_irq_restore(flags);
}
if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
goto out_free_buffer_mask;
- if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
- goto out_free_tracing_cpumask;
-
/* To save memory, keep the ring buffer size to its minimum */
if (ring_buffer_expanded)
ring_buf_size = trace_buf_size;
/* Allocate the first page for all buffers */
for_each_tracing_cpu(i) {
global_trace.data[i] = &per_cpu(global_trace_cpu, i);
- max_tr.data[i] = &per_cpu(max_data, i);
+ max_tr.data[i] = &per_cpu(max_tr_data, i);
}
trace_init_cmdlines();
return 0;
out_free_cpumask:
- free_cpumask_var(tracing_reader_cpumask);
-out_free_tracing_cpumask:
free_cpumask_var(tracing_cpumask);
out_free_buffer_mask:
free_cpumask_var(tracing_buffer_mask);