Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic...

author Ingo Molnar <mingo@elte.hu>

Mon, 3 May 2010 06:29:35 +0000 (08:29 +0200)

committer Ingo Molnar <mingo@elte.hu>

Mon, 3 May 2010 06:29:35 +0000 (08:29 +0200)
author Ingo Molnar <mingo@elte.hu>
Mon, 3 May 2010 06:29:35 +0000 (08:29 +0200)
committer Ingo Molnar <mingo@elte.hu>
Mon, 3 May 2010 06:29:35 +0000 (08:29 +0200)
diff --git a/arch/Kconfig b/arch/Kconfig

index f06010f..acda512 100644 (file)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -137,6 +137,17 @@ config HAVE_HW_BREAKPOINT
         bool
         depends on PERF_EVENTS
  
+config HAVE_MIXED_BREAKPOINTS_REGS
+       bool
+       depends on HAVE_HW_BREAKPOINT
+       help
+         Depending on the arch implementation of hardware breakpoints,
+         some of them have separate registers for data and instruction
+         breakpoints addresses, others have mixed registers to store
+         them but define the access type in a control register.
+         Select this option if your arch implements breakpoints under the
+         latter fashion.
+
  config HAVE_USER_RETURN_NOTIFIER
         bool
  
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig

index 8d90564..e6d8ab5 100644 (file)
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -44,6 +44,7 @@ config SUPERH32
         select HAVE_FUNCTION_GRAPH_TRACER
         select HAVE_ARCH_KGDB
         select HAVE_HW_BREAKPOINT
+       select HAVE_MIXED_BREAKPOINTS_REGS
         select PERF_EVENTS if HAVE_HW_BREAKPOINT
         select ARCH_HIBERNATION_POSSIBLE if MMU
  
diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h

index 965dd78..e14cad9 100644 (file)
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -46,10 +46,14 @@ struct pmu;
  /* Maximum number of UBC channels */
  #define HBP_NUM                2
  
+static inline int hw_breakpoint_slots(int type)
+{
+       return HBP_NUM;
+}
+
  /* arch/sh/kernel/hw_breakpoint.c */
-extern int arch_check_va_in_userspace(unsigned long va, u16 hbp_len);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
-                                        struct task_struct *tsk);
+extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
  extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
                                            unsigned long val, void *data);
  
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c

index 675eea7..1f2cf62 100644 (file)
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -120,25 +120,16 @@ static int get_hbp_len(u16 hbp_len)
  }
  
  /*
- * Check for virtual address in user space.
- */
-int arch_check_va_in_userspace(unsigned long va, u16 hbp_len)
-{
-       unsigned int len;
-
-       len = get_hbp_len(hbp_len);
-
-       return (va <= TASK_SIZE - len);
-}
-
-/*
   * Check for virtual address in kernel space.
   */
-static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
  {
         unsigned int len;
+       unsigned long va;
+       struct arch_hw_breakpoint *info = counter_arch_bp(bp);
  
-       len = get_hbp_len(hbp_len);
+       va = info->address;
+       len = get_hbp_len(info->len);
  
         return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
  }
@@ -226,8 +217,7 @@ static int arch_build_bp_info(struct perf_event *bp)
  /*
   * Validate the arch-specific HW Breakpoint register settings
   */
-int arch_validate_hwbkpt_settings(struct perf_event *bp,
-                                 struct task_struct *tsk)
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
  {
         struct arch_hw_breakpoint *info = counter_arch_bp(bp);
         unsigned int align;
@@ -270,15 +260,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
         if (info->address & align)
                 return -EINVAL;
  
-       /* Check that the virtual address is in the proper range */
-       if (tsk) {
-               if (!arch_check_va_in_userspace(info->address, info->len))
-                       return -EFAULT;
-       } else {
-               if (!arch_check_va_in_kernelspace(info->address, info->len))
-                       return -EFAULT;
-       }
-
         return 0;
  }
  
@@ -363,8 +344,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
                 perf_bp_event(bp, args->regs);
  
                 /* Deliver the signal to userspace */
-               if (arch_check_va_in_userspace(bp->attr.bp_addr,
-                                              bp->attr.bp_len)) {
+               if (!arch_check_bp_in_kernelspace(bp)) {
                         siginfo_t info;
  
                         info.si_signo = args->signr;
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c

index 7759a9a..d4104ce 100644 (file)
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -85,7 +85,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
  
         bp = thread->ptrace_bps[0];
         if (!bp) {
-               hw_breakpoint_init(&attr);
+               ptrace_breakpoint_init(&attr);
  
                 attr.bp_addr = addr;
                 attr.bp_len = HW_BREAKPOINT_LEN_2;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 97a95df..01177dc 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -53,6 +53,7 @@ config X86
         select HAVE_KERNEL_LZMA
         select HAVE_KERNEL_LZO
         select HAVE_HW_BREAKPOINT
+       select HAVE_MIXED_BREAKPOINTS_REGS
         select PERF_EVENTS
         select ANON_INODES
         select HAVE_ARCH_KMEMCHECK
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h

index 2a1bd8f..9422553 100644 (file)
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -41,12 +41,16 @@ struct arch_hw_breakpoint {
  /* Total number of available HW breakpoint registers */
  #define HBP_NUM 4
  
+static inline int hw_breakpoint_slots(int type)
+{
+       return HBP_NUM;
+}
+
  struct perf_event;
  struct pmu;
  
-extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
-                                        struct task_struct *tsk);
+extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
  extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
                                            unsigned long val, void *data);
  
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c

index d6cc065..a8f1b80 100644 (file)
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -189,25 +189,16 @@ static int get_hbp_len(u8 hbp_len)
  }
  
  /*
- * Check for virtual address in user space.
- */
-int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
-{
-       unsigned int len;
-
-       len = get_hbp_len(hbp_len);
-
-       return (va <= TASK_SIZE - len);
-}
-
-/*
   * Check for virtual address in kernel space.
   */
-static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
  {
         unsigned int len;
+       unsigned long va;
+       struct arch_hw_breakpoint *info = counter_arch_bp(bp);
  
-       len = get_hbp_len(hbp_len);
+       va = info->address;
+       len = get_hbp_len(info->len);
  
         return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
  }
@@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp)
  /*
   * Validate the arch-specific HW Breakpoint register settings
   */
-int arch_validate_hwbkpt_settings(struct perf_event *bp,
-                                 struct task_struct *tsk)
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
  {
         struct arch_hw_breakpoint *info = counter_arch_bp(bp);
         unsigned int align;
@@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
  
         ret = -EINVAL;
  
-       if (info->type == X86_BREAKPOINT_EXECUTE)
-               /*
-                * Ptrace-refactoring code
-                * For now, we'll allow instruction breakpoint only for user-space
-                * addresses
-                */
-               if ((!arch_check_va_in_userspace(info->address, info->len)) &&
-                       info->len != X86_BREAKPOINT_EXECUTE)
-                       return ret;
-
         switch (info->len) {
         case X86_BREAKPOINT_LEN_1:
                 align = 0;
@@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
         if (info->address & align)
                 return -EINVAL;
  
-       /* Check that the virtual address is in the proper range */
-       if (tsk) {
-               if (!arch_check_va_in_userspace(info->address, info->len))
-                       return -EFAULT;
-       } else {
-               if (!arch_check_va_in_kernelspace(info->address, info->len))
-                       return -EFAULT;
-       }
-
         return 0;
  }
  
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c

index 055be0a..70c4872 100644 (file)
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -688,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
         struct perf_event_attr attr;
  
         if (!t->ptrace_bps[nr]) {
-               hw_breakpoint_init(&attr);
+               ptrace_breakpoint_init(&attr);
                 /*
                  * Put stub len and type to register (reserve) an inactive but
                  * correct bp
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h

index c70d27a..a2d6ea4 100644 (file)
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -9,9 +9,22 @@ enum {
  };
  
  enum {
-       HW_BREAKPOINT_R = 1,
-       HW_BREAKPOINT_W = 2,
-       HW_BREAKPOINT_X = 4,
+       HW_BREAKPOINT_EMPTY     = 0,
+       HW_BREAKPOINT_R         = 1,
+       HW_BREAKPOINT_W         = 2,
+       HW_BREAKPOINT_RW        = HW_BREAKPOINT_R | HW_BREAKPOINT_W,
+       HW_BREAKPOINT_X         = 4,
+       HW_BREAKPOINT_INVALID   = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
+};
+
+enum bp_type_idx {
+       TYPE_INST       = 0,
+#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
+       TYPE_DATA       = 0,
+#else
+       TYPE_DATA       = 1,
+#endif
+       TYPE_MAX
  };
  
  #ifdef __KERNEL__
@@ -34,6 +47,12 @@ static inline void hw_breakpoint_init(struct perf_event_attr *attr)
         attr->sample_period = 1;
  }
  
+static inline void ptrace_breakpoint_init(struct perf_event_attr *attr)
+{
+       hw_breakpoint_init(attr);
+       attr->exclude_kernel = 1;
+}
+
  static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
  {
         return bp->attr.bp_addr;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c

index 03808ed..684b710 100644 (file)
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,23 +40,29 @@
  #include <linux/percpu.h>
  #include <linux/sched.h>
  #include <linux/init.h>
+#include <linux/slab.h>
  #include <linux/cpu.h>
  #include <linux/smp.h>
  
  #include <linux/hw_breakpoint.h>
  
+
  /*
   * Constraints data
   */
  
  /* Number of pinned cpu breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
+static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
  
  /* Number of pinned task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
+static DEFINE_PER_CPU(unsigned int, *nr_task_bp_pinned[TYPE_MAX]);
  
  /* Number of non-pinned cpu/task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
+static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
+
+static int nr_slots[TYPE_MAX];
+
+static int constraints_initialized;
  
  /* Gather the number of total pinned and un-pinned bp in a cpuset */
  struct bp_busy_slots {
@@ -67,16 +73,29 @@ struct bp_busy_slots {
  /* Serialize accesses to the above constraints */
  static DEFINE_MUTEX(nr_bp_mutex);
  
+__weak int hw_breakpoint_weight(struct perf_event *bp)
+{
+       return 1;
+}
+
+static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
+{
+       if (bp->attr.bp_type & HW_BREAKPOINT_RW)
+               return TYPE_DATA;
+
+       return TYPE_INST;
+}
+
  /*
   * Report the maximum number of pinned breakpoints a task
   * have in this cpu
   */
-static unsigned int max_task_bp_pinned(int cpu)
+static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
  {
         int i;
-       unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
+       unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
  
-       for (i = HBP_NUM -1; i >= 0; i--) {
+       for (i = nr_slots[type] - 1; i >= 0; i--) {
                 if (tsk_pinned[i] > 0)
                         return i + 1;
         }
@@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu)
         return 0;
  }
  
-static int task_bp_pinned(struct task_struct *tsk)
+static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
  {
         struct perf_event_context *ctx = tsk->perf_event_ctxp;
         struct list_head *list;
@@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk)
          */
         list_for_each_entry(bp, list, event_entry) {
                 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
-                       count++;
+                       if (find_slot_idx(bp) == type)
+                               count += hw_breakpoint_weight(bp);
         }
  
         raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk)
   * a given cpu (cpu > -1) or in all of them (cpu = -1).
   */
  static void
-fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
+fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
+                   enum bp_type_idx type)
  {
         int cpu = bp->cpu;
         struct task_struct *tsk = bp->ctx->task;
  
         if (cpu >= 0) {
-               slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
+               slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
                 if (!tsk)
-                       slots->pinned += max_task_bp_pinned(cpu);
+                       slots->pinned += max_task_bp_pinned(cpu, type);
                 else
-                       slots->pinned += task_bp_pinned(tsk);
-               slots->flexible = per_cpu(nr_bp_flexible, cpu);
+                       slots->pinned += task_bp_pinned(tsk, type);
+               slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
  
                 return;
         }
@@ -137,16 +158,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
         for_each_online_cpu(cpu) {
                 unsigned int nr;
  
-               nr = per_cpu(nr_cpu_bp_pinned, cpu);
+               nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
                 if (!tsk)
-                       nr += max_task_bp_pinned(cpu);
+                       nr += max_task_bp_pinned(cpu, type);
                 else
-                       nr += task_bp_pinned(tsk);
+                       nr += task_bp_pinned(tsk, type);
  
                 if (nr > slots->pinned)
                         slots->pinned = nr;
  
-               nr = per_cpu(nr_bp_flexible, cpu);
+               nr = per_cpu(nr_bp_flexible[type], cpu);
  
                 if (nr > slots->flexible)
                         slots->flexible = nr;
@@ -154,31 +175,49 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
  }
  
  /*
+ * For now, continue to consider flexible as pinned, until we can
+ * ensure no flexible event can ever be scheduled before a pinned event
+ * in a same cpu.
+ */
+static void
+fetch_this_slot(struct bp_busy_slots *slots, int weight)
+{
+       slots->pinned += weight;
+}
+
+/*
   * Add a pinned breakpoint for the given task in our constraint table
   */
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
+static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+                               enum bp_type_idx type, int weight)
  {
         unsigned int *tsk_pinned;
-       int count = 0;
+       int old_count = 0;
+       int old_idx = 0;
+       int idx = 0;
  
-       count = task_bp_pinned(tsk);
+       old_count = task_bp_pinned(tsk, type);
+       old_idx = old_count - 1;
+       idx = old_idx + weight;
  
-       tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
+       tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
         if (enable) {
-               tsk_pinned[count]++;
-               if (count > 0)
-                       tsk_pinned[count-1]--;
+               tsk_pinned[idx]++;
+               if (old_count > 0)
+                       tsk_pinned[old_idx]--;
         } else {
-               tsk_pinned[count]--;
-               if (count > 0)
-                       tsk_pinned[count-1]++;
+               tsk_pinned[idx]--;
+               if (old_count > 0)
+                       tsk_pinned[old_idx]++;
         }
  }
  
  /*
   * Add/remove the given breakpoint in our constraint table
   */
-static void toggle_bp_slot(struct perf_event *bp, bool enable)
+static void
+toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
+              int weight)
  {
         int cpu = bp->cpu;
         struct task_struct *tsk = bp->ctx->task;
@@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
         /* Pinned counter task profiling */
         if (tsk) {
                 if (cpu >= 0) {
-                       toggle_bp_task_slot(tsk, cpu, enable);
+                       toggle_bp_task_slot(tsk, cpu, enable, type, weight);
                         return;
                 }
  
                 for_each_online_cpu(cpu)
-                       toggle_bp_task_slot(tsk, cpu, enable);
+                       toggle_bp_task_slot(tsk, cpu, enable, type, weight);
                 return;
         }
  
         /* Pinned counter cpu profiling */
         if (enable)
-               per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
+               per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
         else
-               per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
+               per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
  }
  
  /*
@@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
  static int __reserve_bp_slot(struct perf_event *bp)
  {
         struct bp_busy_slots slots = {0};
+       enum bp_type_idx type;
+       int weight;
  
-       fetch_bp_busy_slots(&slots, bp);
+       /* We couldn't initialize breakpoint constraints on boot */
+       if (!constraints_initialized)
+               return -ENOMEM;
+
+       /* Basic checks */
+       if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
+           bp->attr.bp_type == HW_BREAKPOINT_INVALID)
+               return -EINVAL;
+
+       type = find_slot_idx(bp);
+       weight = hw_breakpoint_weight(bp);
+
+       fetch_bp_busy_slots(&slots, bp, type);
+       fetch_this_slot(&slots, weight);
  
         /* Flexible counters need to keep at least one slot */
-       if (slots.pinned + (!!slots.flexible) == HBP_NUM)
+       if (slots.pinned + (!!slots.flexible) > nr_slots[type])
                 return -ENOSPC;
  
-       toggle_bp_slot(bp, true);
+       toggle_bp_slot(bp, true, type, weight);
  
         return 0;
  }
@@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp)
  
  static void __release_bp_slot(struct perf_event *bp)
  {
-       toggle_bp_slot(bp, false);
+       enum bp_type_idx type;
+       int weight;
+
+       type = find_slot_idx(bp);
+       weight = hw_breakpoint_weight(bp);
+       toggle_bp_slot(bp, false, type, weight);
  }
  
  void release_bp_slot(struct perf_event *bp)
@@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp)
         return 0;
  }
  
+static int validate_hw_breakpoint(struct perf_event *bp)
+{
+       int ret;
+
+       ret = arch_validate_hwbkpt_settings(bp);
+       if (ret)
+               return ret;
+
+       if (arch_check_bp_in_kernelspace(bp)) {
+               if (bp->attr.exclude_kernel)
+                       return -EINVAL;
+               /*
+                * Don't let unprivileged users set a breakpoint in the trap
+                * path to avoid trap recursion attacks.
+                */
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+       }
+
+       return 0;
+}
+
  int register_perf_hw_breakpoint(struct perf_event *bp)
  {
         int ret;
@@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
         if (ret)
                 return ret;
  
-       /*
-        * Ptrace breakpoints can be temporary perf events only
-        * meant to reserve a slot. In this case, it is created disabled and
-        * we don't want to check the params right now (as we put a null addr)
-        * But perf tools create events as disabled and we want to check
-        * the params for them.
-        * This is a quick hack that will be removed soon, once we remove
-        * the tmp breakpoints from ptrace
-        */
-       if (!bp->attr.disabled || !bp->overflow_handler)
-               ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+       ret = validate_hw_breakpoint(bp);
  
         /* if arch_validate_hwbkpt_settings() fails then release bp slot */
         if (ret)
@@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
         if (attr->disabled)
                 goto end;
  
-       err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+       err = validate_hw_breakpoint(bp);
         if (!err)
                 perf_event_enable(bp);
  
@@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
  
  static int __init init_hw_breakpoint(void)
  {
+       unsigned int **task_bp_pinned;
+       int cpu, err_cpu;
+       int i;
+
+       for (i = 0; i < TYPE_MAX; i++)
+               nr_slots[i] = hw_breakpoint_slots(i);
+
+       for_each_possible_cpu(cpu) {
+               for (i = 0; i < TYPE_MAX; i++) {
+                       task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
+                       *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
+                                                 GFP_KERNEL);
+                       if (!*task_bp_pinned)
+                               goto err_alloc;
+               }
+       }
+
+       constraints_initialized = 1;
+
         return register_die_notifier(&hw_breakpoint_exceptions_nb);
+
+ err_alloc:
+       for_each_possible_cpu(err_cpu) {
+               if (err_cpu == cpu)
+                       break;
+               for (i = 0; i < TYPE_MAX; i++)
+                       kfree(per_cpu(nr_task_bp_pinned[i], cpu));
+       }
+
+       return -ENOMEM;
  }
  core_initcall(init_hw_breakpoint);
  
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c

index d59cd68..8eaf007 100644 (file)
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -34,12 +34,6 @@
  
  #include <asm/atomic.h>
  
-/*
- * For now, let us restrict the no. of symbols traced simultaneously to number
- * of available hardware breakpoint registers.
- */
-#define KSYM_TRACER_MAX HBP_NUM
-
  #define KSYM_TRACER_OP_LEN 3 /* rw- */
  
  struct trace_ksym {
@@ -53,7 +47,6 @@ struct trace_ksym {
  
  static struct trace_array *ksym_trace_array;
  
-static unsigned int ksym_filter_entry_count;
  static unsigned int ksym_tracing_enabled;
  
  static HLIST_HEAD(ksym_filter_head);
@@ -181,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
         struct trace_ksym *entry;
         int ret = -ENOMEM;
  
-       if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
-               printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
-               " new requests for tracing can be accepted now.\n",
-                       KSYM_TRACER_MAX);
-               return -ENOSPC;
-       }
-
         entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
         if (!entry)
                 return -ENOMEM;
@@ -203,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
  
         if (IS_ERR(entry->ksym_hbp)) {
                 ret = PTR_ERR(entry->ksym_hbp);
-               printk(KERN_INFO "ksym_tracer request failed. Try again"
-                                       " later!!\n");
+               if (ret == -ENOSPC) {
+                       printk(KERN_ERR "ksym_tracer: Maximum limit reached."
+                       " No new requests for tracing can be accepted now.\n");
+               } else {
+                       printk(KERN_INFO "ksym_tracer request failed. Try again"
+                                        " later!!\n");
+               }
                 goto err;
         }
  
         hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
-       ksym_filter_entry_count++;
  
         return 0;
  
@@ -265,7 +255,6 @@ static void __ksym_trace_reset(void)
         hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
                                                                 ksym_hlist) {
                 unregister_wide_hw_breakpoint(entry->ksym_hbp);
-               ksym_filter_entry_count--;
                 hlist_del_rcu(&(entry->ksym_hlist));
                 synchronize_rcu();
                 kfree(entry);
@@ -338,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file,
                                 goto out_unlock;
                 }
                 /* Error or "symbol:---" case: drop it */
-               ksym_filter_entry_count--;
                 hlist_del_rcu(&(entry->ksym_hlist));
                 synchronize_rcu();
                 kfree(entry);
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt

index d729cee..ee6525e 100644 (file)
--- a/tools/perf/Documentation/perf-trace-perl.txt
+++ b/tools/perf/Documentation/perf-trace-perl.txt
@@ -49,12 +49,10 @@ available as calls back into the perf executable (see below).
  As an example, the following perf record command can be used to record
  all sched_wakeup events in the system:
  
- # perf record -c 1 -f -a -M -R -e sched:sched_wakeup
+ # perf record -a -e sched:sched_wakeup
  
  Traces meant to be processed using a script should be recorded with
-the above options: -c 1 says to sample every event, -a to enable
-system-wide collection, -M to multiplex the output, and -R to collect
-raw samples.
+the above option: -a to enable system-wide collection.
  
  The format file for the sched_wakep event defines the following fields
  (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-trace-python.txt

index a241aca..16a8650 100644 (file)
--- a/tools/perf/Documentation/perf-trace-python.txt
+++ b/tools/perf/Documentation/perf-trace-python.txt
@@ -93,7 +93,7 @@ don't care how it exited, so we'll use 'perf record' to record only
  the sys_enter events:
  
  ----
-# perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
+# perf record -a -e raw_syscalls:sys_enter
  
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
@@ -359,7 +359,7 @@ your script:
  # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
  
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
+perf record -a -e raw_syscalls:sys_enter
  ----
  
  The 'report' script is also a shell script with the same base name as
@@ -449,12 +449,10 @@ available as calls back into the perf executable (see below).
  As an example, the following perf record command can be used to record
  all sched_wakeup events in the system:
  
- # perf record -c 1 -f -a -M -R -e sched:sched_wakeup
+ # perf record -a -e sched:sched_wakeup
  
  Traces meant to be processed using a script should be recorded with
-the above options: -c 1 says to sample every event, -a to enable
-system-wide collection, -M to multiplex the output, and -R to collect
-raw samples.
+the above option: -a to enable system-wide collection.
  
  The format file for the sched_wakep event defines the following fields
  (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record

index e6cb147..423ad6a 100644 (file)
--- a/tools/perf/scripts/perl/bin/check-perf-trace-record
+++ b/tools/perf/scripts/perl/bin/check-perf-trace-record
@@ -1,2 +1,2 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree
+perf record -a -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-record b/tools/perf/scripts/perl/bin/failed-syscalls-record

index 6ad9b8f..eb5846b 100644 (file)
--- a/tools/perf/scripts/perl/bin/failed-syscalls-record
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-record
@@ -1,2 +1,2 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@
+perf record -a -e raw_syscalls:sys_exit $@
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-record b/tools/perf/scripts/perl/bin/rw-by-file-record

index a828679..5bfaae5 100644 (file)
--- a/tools/perf/scripts/perl/bin/rw-by-file-record
+++ b/tools/perf/scripts/perl/bin/rw-by-file-record
@@ -1,3 +1,3 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@
+perf record -a -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@
  
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-record b/tools/perf/scripts/perl/bin/rw-by-pid-record

index 63976bf..6e0b2f7 100644 (file)
--- a/tools/perf/scripts/perl/bin/rw-by-pid-record
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-record
@@ -1,2 +1,2 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
+perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
diff --git a/tools/perf/scripts/perl/bin/rwtop-record b/tools/perf/scripts/perl/bin/rwtop-record

index 63976bf..6e0b2f7 100644 (file)
--- a/tools/perf/scripts/perl/bin/rwtop-record
+++ b/tools/perf/scripts/perl/bin/rwtop-record
@@ -1,2 +1,2 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
+perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-record b/tools/perf/scripts/perl/bin/wakeup-latency-record

index 9c0cf58..9f2acaa 100644 (file)
--- a/tools/perf/scripts/perl/bin/wakeup-latency-record
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-record
@@ -1,5 +1,5 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup $@
+perf record -a -e sched:sched_switch -e sched:sched_wakeup $@
  
  
  
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record

index c2a1a94..85301f2 100644 (file)
--- a/tools/perf/scripts/perl/bin/workqueue-stats-record
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-record
@@ -1,2 +1,2 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@
+perf record -a -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record

index 6ad9b8f..eb5846b 100644 (file)
--- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
@@ -1,2 +1,2 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@
+perf record -a -e raw_syscalls:sys_exit $@
diff --git a/tools/perf/scripts/python/bin/sctop-record b/tools/perf/scripts/python/bin/sctop-record

index 27ccffa..1fc5998 100644 (file)
--- a/tools/perf/scripts/python/bin/sctop-record
+++ b/tools/perf/scripts/python/bin/sctop-record
@@ -1,2 +1,2 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@
+perf record -a -e raw_syscalls:sys_enter $@
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record

index 27ccffa..1fc5998 100644 (file)
--- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
@@ -1,2 +1,2 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@
+perf record -a -e raw_syscalls:sys_enter $@
diff --git a/tools/perf/scripts/python/bin/syscall-counts-record b/tools/perf/scripts/python/bin/syscall-counts-record

index 27ccffa..1fc5998 100644 (file)
--- a/tools/perf/scripts/python/bin/syscall-counts-record
+++ b/tools/perf/scripts/python/bin/syscall-counts-record
@@ -1,2 +1,2 @@
  #!/bin/bash
-perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@
+perf record -a -e raw_syscalls:sys_enter $@
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c

index d6ef414..069f261 100644 (file)
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -691,11 +691,6 @@ static int __read_expected(enum event_type expect, const char *str,
         return ret;
  }
  
-static int read_expected_warn(enum event_type expect, const char *str, bool warn)
-{
-       return __read_expected(expect, str, 1, warn);
-}
-
  static int read_expected(enum event_type expect, const char *str)
  {
         return __read_expected(expect, str, 1, true);
@@ -3104,90 +3099,6 @@ static void print_args(struct print_arg *args)
         }
  }
  
-static void parse_header_field(const char *field,
-                              int *offset, int *size, bool warn)
-{
-       char *token;
-       int type;
-
-       if (read_expected(EVENT_ITEM, "field") < 0)
-               return;
-       if (read_expected(EVENT_OP, ":") < 0)
-               return;
-
-       /* type */
-       if (read_expect_type(EVENT_ITEM, &token) < 0)
-               goto fail;
-       free_token(token);
-
-       if (read_expected_warn(EVENT_ITEM, field, warn) < 0)
-               return;
-       if (read_expected(EVENT_OP, ";") < 0)
-               return;
-       if (read_expected(EVENT_ITEM, "offset") < 0)
-               return;
-       if (read_expected(EVENT_OP, ":") < 0)
-               return;
-       if (read_expect_type(EVENT_ITEM, &token) < 0)
-               goto fail;
-       *offset = atoi(token);
-       free_token(token);
-       if (read_expected(EVENT_OP, ";") < 0)
-               return;
-       if (read_expected(EVENT_ITEM, "size") < 0)
-               return;
-       if (read_expected(EVENT_OP, ":") < 0)
-               return;
-       if (read_expect_type(EVENT_ITEM, &token) < 0)
-               goto fail;
-       *size = atoi(token);
-       free_token(token);
-       if (read_expected(EVENT_OP, ";") < 0)
-               return;
-       type = read_token(&token);
-       if (type != EVENT_NEWLINE) {
-               /* newer versions of the kernel have a "signed" type */
-               if (type != EVENT_ITEM)
-                       goto fail;
-
-               if (strcmp(token, "signed") != 0)
-                       goto fail;
-
-               free_token(token);
-
-               if (read_expected(EVENT_OP, ":") < 0)
-                       return;
-
-               if (read_expect_type(EVENT_ITEM, &token))
-                       goto fail;
-
-               free_token(token);
-               if (read_expected(EVENT_OP, ";") < 0)
-                       return;
-
-               if (read_expect_type(EVENT_NEWLINE, &token))
-                       goto fail;
-       }
- fail:
-       free_token(token);
-}
-
-int parse_header_page(char *buf, unsigned long size)
-{
-       init_input_buf(buf, size);
-
-       parse_header_field("timestamp", &header_page_ts_offset,
-                          &header_page_ts_size, true);
-       parse_header_field("commit", &header_page_size_offset,
-                          &header_page_size_size, true);
-       parse_header_field("overwrite", &header_page_overwrite_offset,
-                          &header_page_overwrite_size, false);
-       parse_header_field("data", &header_page_data_offset,
-                          &header_page_data_size, true);
-
-       return 0;
-}
-
  int parse_ftrace_file(char *buf, unsigned long size)
  {
         struct format_field *field;
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c

index 43f19c1..cb54cd0 100644 (file)
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -53,6 +53,12 @@ static unsigned long page_size;
  static ssize_t calc_data_size;
  static bool repipe;
  
+/* If it fails, the next read will report it */
+static void skip(int size)
+{
+       lseek(input_fd, size, SEEK_CUR);
+}
+
  static int do_read(int fd, void *buf, int size)
  {
         int rsize = size;
@@ -184,7 +190,6 @@ static void read_ftrace_printk(void)
  static void read_header_files(void)
  {
         unsigned long long size;
-       char *header_page;
         char *header_event;
         char buf[BUFSIZ];
  
@@ -194,10 +199,7 @@ static void read_header_files(void)
                 die("did not read header page");
  
         size = read8();
-       header_page = malloc_or_die(size);
-       read_or_die(header_page, size);
-       parse_header_page(header_page, size);
-       free(header_page);
+       skip(size);
  
         /*
          * The size field in the page is of type long,
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h

index ebfee80..406d452 100644 (file)
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -244,7 +244,6 @@ extern int header_page_data_size;
  
  extern bool latency_format;
  
-int parse_header_page(char *buf, unsigned long size);
  int trace_parse_common_type(void *data);
  int trace_parse_common_pid(void *data);
  int parse_common_pc(void *data);
author	Ingo Molnar <mingo@elte.hu>
	Mon, 3 May 2010 06:29:35 +0000 (08:29 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Mon, 3 May 2010 06:29:35 +0000 (08:29 +0200)
arch/Kconfig		patch \| blob \| history
arch/sh/Kconfig		patch \| blob \| history
arch/sh/include/asm/hw_breakpoint.h		patch \| blob \| history
arch/sh/kernel/hw_breakpoint.c		patch \| blob \| history
arch/sh/kernel/ptrace_32.c		patch \| blob \| history
arch/x86/Kconfig		patch \| blob \| history
arch/x86/include/asm/hw_breakpoint.h		patch \| blob \| history
arch/x86/kernel/hw_breakpoint.c		patch \| blob \| history
arch/x86/kernel/ptrace.c		patch \| blob \| history
include/linux/hw_breakpoint.h		patch \| blob \| history
kernel/hw_breakpoint.c		patch \| blob \| history
kernel/trace/trace_ksym.c		patch \| blob \| history
tools/perf/Documentation/perf-trace-perl.txt		patch \| blob \| history
tools/perf/Documentation/perf-trace-python.txt		patch \| blob \| history
tools/perf/scripts/perl/bin/check-perf-trace-record		patch \| blob \| history
tools/perf/scripts/perl/bin/failed-syscalls-record		patch \| blob \| history
tools/perf/scripts/perl/bin/rw-by-file-record		patch \| blob \| history
tools/perf/scripts/perl/bin/rw-by-pid-record		patch \| blob \| history
tools/perf/scripts/perl/bin/rwtop-record		patch \| blob \| history
tools/perf/scripts/perl/bin/wakeup-latency-record		patch \| blob \| history
tools/perf/scripts/perl/bin/workqueue-stats-record		patch \| blob \| history
tools/perf/scripts/python/bin/failed-syscalls-by-pid-record		patch \| blob \| history
tools/perf/scripts/python/bin/sctop-record		patch \| blob \| history
tools/perf/scripts/python/bin/syscall-counts-by-pid-record		patch \| blob \| history
tools/perf/scripts/python/bin/syscall-counts-record		patch \| blob \| history
tools/perf/util/trace-event-parse.c		patch \| blob \| history
tools/perf/util/trace-event-read.c		patch \| blob \| history
tools/perf/util/trace-event.h		patch \| blob \| history