perf_counter: Fix a race on perf_counter_ctx

[safe/jmp/linux-2.6] / kernel / perf_counter.c
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c

index 615440a..546e62d 100644 (file)
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2646,7 +2646,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
                 u64 counter;
         } group_entry;
         struct perf_callchain_entry *callchain = NULL;
-       struct perf_tracepoint_record *tp;
+       struct perf_raw_record *raw = NULL;
         int callchain_size = 0;
         u64 time;
         struct {
@@ -2715,9 +2715,10 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
                         header.size += sizeof(u64);
         }
  
-       if (sample_type & PERF_SAMPLE_TP_RECORD) {
-               tp = data->private;
-               header.size += tp->size;
+       if (sample_type & PERF_SAMPLE_RAW) {
+               raw = data->raw;
+               if (raw)
+                       header.size += raw->size;
         }
  
         ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2783,8 +2784,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
                 }
         }
  
-       if (sample_type & PERF_SAMPLE_TP_RECORD)
-               perf_output_copy(&handle, tp->record, tp->size);
+       if ((sample_type & PERF_SAMPLE_RAW) && raw)
+               perf_output_copy(&handle, raw->data, raw->size);
  
         perf_output_end(&handle);
  }
@@ -2849,7 +2850,8 @@ perf_counter_read_event(struct perf_counter *counter,
   */
  
  struct perf_task_event {
-       struct task_struct      *task;
+       struct task_struct              *task;
+       struct perf_counter_context     *task_ctx;
  
         struct {
                 struct perf_event_header        header;
@@ -2909,24 +2911,23 @@ static void perf_counter_task_ctx(struct perf_counter_context *ctx,
  static void perf_counter_task_event(struct perf_task_event *task_event)
  {
         struct perf_cpu_context *cpuctx;
-       struct perf_counter_context *ctx;
+       struct perf_counter_context *ctx = task_event->task_ctx;
  
         cpuctx = &get_cpu_var(perf_cpu_context);
         perf_counter_task_ctx(&cpuctx->ctx, task_event);
         put_cpu_var(perf_cpu_context);
  
         rcu_read_lock();
-       /*
-        * doesn't really matter which of the child contexts the
-        * events ends up in.
-        */
-       ctx = rcu_dereference(current->perf_counter_ctxp);
+       if (!ctx)
+               ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
         if (ctx)
                 perf_counter_task_ctx(ctx, task_event);
         rcu_read_unlock();
  }
  
-static void perf_counter_task(struct task_struct *task, int new)
+static void perf_counter_task(struct task_struct *task,
+                             struct perf_counter_context *task_ctx,
+                             int new)
  {
         struct perf_task_event task_event;
  
@@ -2936,8 +2937,9 @@ static void perf_counter_task(struct task_struct *task, int new)
                 return;
  
         task_event = (struct perf_task_event){
-               .task   = task,
-               .event  = {
+               .task     = task,
+               .task_ctx = task_ctx,
+               .event    = {
                         .header = {
                                 .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
                                 .misc = 0,
@@ -2955,7 +2957,7 @@ static void perf_counter_task(struct task_struct *task, int new)
  
  void perf_counter_fork(struct task_struct *task)
  {
-       perf_counter_task(task, 1);
+       perf_counter_task(task, NULL, 1);
  }
  
  /*
@@ -3739,15 +3741,15 @@ static const struct pmu perf_ops_task_clock = {
  void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
                           int entry_size)
  {
-       struct perf_tracepoint_record tp = {
+       struct perf_raw_record raw = {
                 .size = entry_size,
-               .record = record,
+               .data = record,
         };
  
         struct perf_sample_data data = {
                 .regs = get_irq_regs(),
                 .addr = addr,
-               .private = &tp,
+               .raw = &raw,
         };
  
         if (!data.regs)
@@ -4309,7 +4311,7 @@ void perf_counter_exit_task(struct task_struct *child)
         unsigned long flags;
  
         if (likely(!child->perf_counter_ctxp)) {
-               perf_counter_task(child, 0);
+               perf_counter_task(child, NULL, 0);
                 return;
         }
  
@@ -4329,6 +4331,7 @@ void perf_counter_exit_task(struct task_struct *child)
          * incremented the context's refcount before we do put_ctx below.
          */
         spin_lock(&child_ctx->lock);
+       child->perf_counter_ctxp = NULL;
         /*
          * If this context is a clone; unclone it so it can't get
          * swapped to another process while we're removing all
@@ -4342,9 +4345,7 @@ void perf_counter_exit_task(struct task_struct *child)
          * won't get any samples after PERF_EVENT_EXIT. We can however still
          * get a few PERF_EVENT_READ events.
          */
-       perf_counter_task(child, 0);
-
-       child->perf_counter_ctxp = NULL;
+       perf_counter_task(child, child_ctx, 0);
  
         /*
          * We can recurse on the same lock type through: