Merge branches 'perf/powerpc' and 'perf/bench' into perf/core
authorIngo Molnar <mingo@elte.hu>
Sun, 15 Nov 2009 08:51:19 +0000 (09:51 +0100)
committerIngo Molnar <mingo@elte.hu>
Sun, 15 Nov 2009 08:51:24 +0000 (09:51 +0100)
Merge reason: Both 'perf bench' and the pending PowerPC changes
              are now ready for the next merge window.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
37 files changed:
arch/powerpc/Kconfig.debug
arch/powerpc/configs/pseries_defconfig
arch/powerpc/include/asm/emulated_ops.h
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/trace.h [new file with mode: 0644]
arch/powerpc/kernel/align.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/power5+-pmu.c
arch/powerpc/kernel/power5-pmu.c
arch/powerpc/kernel/power6-pmu.c
arch/powerpc/kernel/power7-pmu.c
arch/powerpc/kernel/ppc970-pmu.c
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/time.c
arch/powerpc/kernel/traps.c
arch/powerpc/lib/copypage_64.S
arch/powerpc/platforms/pseries/hvCall.S
arch/powerpc/platforms/pseries/hvCall_inst.c
arch/powerpc/platforms/pseries/lpar.c
include/linux/perf_counter.h
include/linux/perf_event.h
kernel/perf_event.c
tools/perf/Documentation/perf-bench.txt [new file with mode: 0644]
tools/perf/Makefile
tools/perf/bench/bench.h [new file with mode: 0644]
tools/perf/bench/sched-messaging.c [new file with mode: 0644]
tools/perf/bench/sched-pipe.c [new file with mode: 0644]
tools/perf/builtin-bench.c [new file with mode: 0644]
tools/perf/builtin.h
tools/perf/command-list.txt
tools/perf/design.txt
tools/perf/perf.c
tools/perf/util/parse-events.c

index 3b10051..bf3382f 100644 (file)
@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE
 
 config HCALL_STATS
        bool "Hypervisor call instrumentation"
-       depends on PPC_PSERIES && DEBUG_FS
+       depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
        help
          Adds code to keep track of the number of hypervisor calls made and
          the amount of time spent in hypervisor calls.  Wall time spent in
index f1889ab..c568329 100644 (file)
@@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
-CONFIG_HCALL_STATS=y
+# CONFIG_HCALL_STATS is not set
 # CONFIG_CODE_PATCHING_SELFTEST is not set
 # CONFIG_FTR_FIXUP_SELFTEST is not set
 # CONFIG_MSI_BITMAP_SELFTEST is not set
index 9154e85..f0fb4fc 100644 (file)
@@ -19,6 +19,7 @@
 #define _ASM_POWERPC_EMULATED_OPS_H
 
 #include <asm/atomic.h>
+#include <linux/perf_event.h>
 
 
 #ifdef CONFIG_PPC_EMULATED_STATS
@@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated;
 
 extern void ppc_warn_emulated_print(const char *type);
 
-#define PPC_WARN_EMULATED(type)                                                 \
+#define __PPC_WARN_EMULATED(type)                                       \
        do {                                                             \
                atomic_inc(&ppc_emulated.type.val);                      \
                if (ppc_warn_emulated)                                   \
@@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type);
 
 #else /* !CONFIG_PPC_EMULATED_STATS */
 
-#define PPC_WARN_EMULATED(type)        do { } while (0)
+#define __PPC_WARN_EMULATED(type)      do { } while (0)
 
 #endif /* !CONFIG_PPC_EMULATED_STATS */
 
+#define PPC_WARN_EMULATED(type, regs)                                  \
+       do {                                                            \
+               perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,           \
+                       1, 0, regs, 0);                                 \
+               __PPC_WARN_EMULATED(type);                              \
+       } while (0)
+
+#define PPC_WARN_ALIGNMENT(type, regs)                                 \
+       do {                                                            \
+               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,           \
+                       1, 0, regs, regs->dar);                         \
+               __PPC_WARN_EMULATED(type);                              \
+       } while (0)
+
 #endif /* _ASM_POWERPC_EMULATED_OPS_H */
index 6251a4b..c27caac 100644 (file)
@@ -274,6 +274,8 @@ struct hcall_stats {
        unsigned long   num_calls;      /* number of calls (on this CPU) */
        unsigned long   tb_total;       /* total wall time (mftb) of calls. */
        unsigned long   purr_total;     /* total cpu time (PURR) of calls. */
+       unsigned long   tb_start;
+       unsigned long   purr_start;
 };
 #define HCALL_STAT_ARRAY_SIZE  ((MAX_HCALL_OPCODE >> 2) + 1)
 
index 6315edc..bc8dd53 100644 (file)
 #define SPRN_MMCR1     798
 #define SPRN_MMCRA     0x312
 #define   MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
+#define   MMCRA_SDAR_DCACHE_MISS 0x40000000UL
+#define   MMCRA_SDAR_ERAT_MISS   0x20000000UL
 #define   MMCRA_SIHV   0x10000000UL /* state of MSR HV when SIAR set */
 #define   MMCRA_SIPR   0x08000000UL /* state of MSR PR when SIAR set */
 #define   MMCRA_SLOT   0x07000000UL /* SLOT bits (37-39) */
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
new file mode 100644 (file)
index 0000000..cbe2297
--- /dev/null
@@ -0,0 +1,133 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM powerpc
+
+#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWERPC_H
+
+#include <linux/tracepoint.h>
+
+struct pt_regs;
+
+TRACE_EVENT(irq_entry,
+
+       TP_PROTO(struct pt_regs *regs),
+
+       TP_ARGS(regs),
+
+       TP_STRUCT__entry(
+               __field(struct pt_regs *, regs)
+       ),
+
+       TP_fast_assign(
+               __entry->regs = regs;
+       ),
+
+       TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(irq_exit,
+
+       TP_PROTO(struct pt_regs *regs),
+
+       TP_ARGS(regs),
+
+       TP_STRUCT__entry(
+               __field(struct pt_regs *, regs)
+       ),
+
+       TP_fast_assign(
+               __entry->regs = regs;
+       ),
+
+       TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(timer_interrupt_entry,
+
+       TP_PROTO(struct pt_regs *regs),
+
+       TP_ARGS(regs),
+
+       TP_STRUCT__entry(
+               __field(struct pt_regs *, regs)
+       ),
+
+       TP_fast_assign(
+               __entry->regs = regs;
+       ),
+
+       TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(timer_interrupt_exit,
+
+       TP_PROTO(struct pt_regs *regs),
+
+       TP_ARGS(regs),
+
+       TP_STRUCT__entry(
+               __field(struct pt_regs *, regs)
+       ),
+
+       TP_fast_assign(
+               __entry->regs = regs;
+       ),
+
+       TP_printk("pt_regs=%p", __entry->regs)
+);
+
+#ifdef CONFIG_PPC_PSERIES
+extern void hcall_tracepoint_regfunc(void);
+extern void hcall_tracepoint_unregfunc(void);
+
+TRACE_EVENT_FN(hcall_entry,
+
+       TP_PROTO(unsigned long opcode, unsigned long *args),
+
+       TP_ARGS(opcode, args),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, opcode)
+       ),
+
+       TP_fast_assign(
+               __entry->opcode = opcode;
+       ),
+
+       TP_printk("opcode=%lu", __entry->opcode),
+
+       hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+
+TRACE_EVENT_FN(hcall_exit,
+
+       TP_PROTO(unsigned long opcode, unsigned long retval,
+               unsigned long *retbuf),
+
+       TP_ARGS(opcode, retval, retbuf),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, opcode)
+               __field(unsigned long, retval)
+       ),
+
+       TP_fast_assign(
+               __entry->opcode = opcode;
+               __entry->retval = retval;
+       ),
+
+       TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
+
+       hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+#endif
+
+#endif /* _TRACE_POWERPC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
index a5b632e..3839839 100644 (file)
@@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs)
 
 #ifdef CONFIG_SPE
        if ((instr >> 26) == 0x4) {
-               PPC_WARN_EMULATED(spe);
+               PPC_WARN_ALIGNMENT(spe, regs);
                return emulate_spe(regs, reg, instr);
        }
 #endif
@@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs)
                        flags |= SPLT;
                        nb = 8;
                }
-               PPC_WARN_EMULATED(vsx);
+               PPC_WARN_ALIGNMENT(vsx, regs);
                return emulate_vsx(addr, reg, areg, regs, flags, nb);
        }
 #endif
@@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs)
         * the exception of DCBZ which is handled as a special case here
         */
        if (instr == DCBZ) {
-               PPC_WARN_EMULATED(dcbz);
+               PPC_WARN_ALIGNMENT(dcbz, regs);
                return emulate_dcbz(regs, addr);
        }
        if (unlikely(nb == 0))
@@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs)
         * function
         */
        if (flags & M) {
-               PPC_WARN_EMULATED(multiple);
+               PPC_WARN_ALIGNMENT(multiple, regs);
                return emulate_multiple(regs, addr, reg, nb,
                                        flags, instr, swiz);
        }
@@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs)
 
        /* Special case for 16-byte FP loads and stores */
        if (nb == 16) {
-               PPC_WARN_EMULATED(fp_pair);
+               PPC_WARN_ALIGNMENT(fp_pair, regs);
                return emulate_fp_pair(addr, reg, flags);
        }
 
-       PPC_WARN_EMULATED(unaligned);
+       PPC_WARN_ALIGNMENT(unaligned, regs);
 
        /* If we are loading, get the data from user space, else
         * get it from register values
index 9763267..bdcb557 100644 (file)
@@ -551,7 +551,7 @@ restore:
 BEGIN_FW_FTR_SECTION
        ld      r5,SOFTE(r1)
 FW_FTR_SECTION_ELSE
-       b       iseries_check_pending_irqs
+       b       .Liseries_check_pending_irqs
 ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 2:
        TRACE_AND_RESTORE_IRQ(r5);
@@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 
 #endif /* CONFIG_PPC_BOOK3E */
 
-iseries_check_pending_irqs:
+.Liseries_check_pending_irqs:
 #ifdef CONFIG_PPC_ISERIES
        ld      r5,SOFTE(r1)
        cmpdi   0,r5,0
index 1808876..c7eb4e0 100644 (file)
@@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
         * prolog code of the PerformanceMonitor one. A little
         * trickery is thus necessary
         */
+performance_monitor_pSeries_1:
        . = 0xf00
        b       performance_monitor_pSeries
 
+altivec_unavailable_pSeries_1:
        . = 0xf20
        b       altivec_unavailable_pSeries
 
+vsx_unavailable_pSeries_1:
        . = 0xf40
        b       vsx_unavailable_pSeries
 
index e5d1211..02a3346 100644 (file)
@@ -70,6 +70,8 @@
 #include <asm/firmware.h>
 #include <asm/lv1call.h>
 #endif
+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
 
 int __irq_offset_value;
 static int ppc_spurious_interrupts;
@@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs)
        struct pt_regs *old_regs = set_irq_regs(regs);
        unsigned int irq;
 
+       trace_irq_entry(regs);
+
        irq_enter();
 
        check_stack_overflow();
@@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs)
                timer_interrupt(regs);
        }
 #endif
+
+       trace_irq_exit(regs);
 }
 
 void __init init_IRQ(void)
index 87f1663..1eb85fb 100644 (file)
@@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
         */
        if (record) {
                struct perf_sample_data data = {
-                       .addr   = 0,
+                       .addr   = ~0ULL,
                        .period = event->hw.last_period,
                };
 
index 0f4c1c7..199de52 100644 (file)
 #define MMCR1_PMCSEL_MSK       0x7f
 
 /*
- * Bits in MMCRA
- */
-
-/*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
index c351b3a..98b6a72 100644 (file)
 #define MMCR1_PMCSEL_MSK       0x7f
 
 /*
- * Bits in MMCRA
- */
-
-/*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
@@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
                               unsigned int hwc[], unsigned long mmcr[])
 {
        unsigned long mmcr1 = 0;
-       unsigned long mmcra = 0;
+       unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
        unsigned int pmc, unit, byte, psel;
        unsigned int ttm, grp;
        int i, isbus, bit, grsel;
index ca399ba..84a607b 100644 (file)
@@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
                           unsigned int hwc[], unsigned long mmcr[])
 {
        unsigned long mmcr1 = 0;
-       unsigned long mmcra = 0;
+       unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
        int i;
        unsigned int pmc, ev, b, u, s, psel;
        unsigned int ttmset = 0;
index 28a4daa..852f7b7 100644 (file)
 #define MMCR1_PMCSEL_MSK       0xff
 
 /*
- * Bits in MMCRA
- */
-
-/*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
@@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
                               unsigned int hwc[], unsigned long mmcr[])
 {
        unsigned long mmcr1 = 0;
-       unsigned long mmcra = 0;
+       unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
        unsigned int pmc, unit, combine, l2sel, psel;
        unsigned int pmc_inuse = 0;
        int i;
index 4795744..8eff48e 100644 (file)
@@ -84,10 +84,6 @@ static short mmcr1_adder_bits[8] = {
 };
 
 /*
- * Bits in MMCRA
- */
-
-/*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
index 4271f7a..845c72a 100644 (file)
@@ -660,6 +660,7 @@ late_initcall(check_cache_coherency);
 
 #ifdef CONFIG_DEBUG_FS
 struct dentry *powerpc_debugfs_root;
+EXPORT_SYMBOL(powerpc_debugfs_root);
 
 static int powerpc_debugfs_init(void)
 {
index a136a11..36707de 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/irq.h>
 #include <linux/delay.h>
 #include <linux/perf_event.h>
+#include <asm/trace.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs)
        struct clock_event_device *evt = &decrementer->event;
        u64 now;
 
+       trace_timer_interrupt_entry(regs);
+
        /* Ensure a positive value is written to the decrementer, or else
         * some CPUs will continuue to take decrementer exceptions */
        set_dec(DECREMENTER_MAX);
@@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs)
                now = decrementer->next_tb - now;
                if (now <= DECREMENTER_MAX)
                        set_dec((int)now);
+               trace_timer_interrupt_exit(regs);
                return;
        }
        old_regs = set_irq_regs(regs);
@@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs)
 
        irq_exit();
        set_irq_regs(old_regs);
+
+       trace_timer_interrupt_exit(regs);
 }
 
 void wakeup_decrementer(void)
index 6f0ae1a..9d1f935 100644 (file)
@@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs)
 
        /* Emulate the mfspr rD, PVR. */
        if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
-               PPC_WARN_EMULATED(mfpvr);
+               PPC_WARN_EMULATED(mfpvr, regs);
                rd = (instword >> 21) & 0x1f;
                regs->gpr[rd] = mfspr(SPRN_PVR);
                return 0;
@@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs)
 
        /* Emulating the dcba insn is just a no-op.  */
        if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
-               PPC_WARN_EMULATED(dcba);
+               PPC_WARN_EMULATED(dcba, regs);
                return 0;
        }
 
@@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs)
                int shift = (instword >> 21) & 0x1c;
                unsigned long msk = 0xf0000000UL >> shift;
 
-               PPC_WARN_EMULATED(mcrxr);
+               PPC_WARN_EMULATED(mcrxr, regs);
                regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
                regs->xer &= ~0xf0000000UL;
                return 0;
@@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs)
 
        /* Emulate load/store string insn. */
        if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
-               PPC_WARN_EMULATED(string);
+               PPC_WARN_EMULATED(string, regs);
                return emulate_string_inst(regs, instword);
        }
 
        /* Emulate the popcntb (Population Count Bytes) instruction. */
        if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
-               PPC_WARN_EMULATED(popcntb);
+               PPC_WARN_EMULATED(popcntb, regs);
                return emulate_popcntb_inst(regs, instword);
        }
 
        /* Emulate isel (Integer Select) instruction */
        if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
-               PPC_WARN_EMULATED(isel);
+               PPC_WARN_EMULATED(isel, regs);
                return emulate_isel(regs, instword);
        }
 
@@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs)
 #ifdef CONFIG_MATH_EMULATION
        errcode = do_mathemu(regs);
        if (errcode >= 0)
-               PPC_WARN_EMULATED(math);
+               PPC_WARN_EMULATED(math, regs);
 
        switch (errcode) {
        case 0:
@@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs)
 #elif defined(CONFIG_8XX_MINIMAL_FPEMU)
        errcode = Soft_emulate_8xx(regs);
        if (errcode >= 0)
-               PPC_WARN_EMULATED(8xx);
+               PPC_WARN_EMULATED(8xx, regs);
 
        switch (errcode) {
        case 0:
@@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs)
 
        flush_altivec_to_thread(current);
 
-       PPC_WARN_EMULATED(altivec);
+       PPC_WARN_EMULATED(altivec, regs);
        err = emulate_altivec(regs);
        if (err == 0) {
                regs->nip += 4;         /* skip emulated instruction */
index 75f3267..e68beac 100644 (file)
@@ -26,11 +26,11 @@ BEGIN_FTR_SECTION
        srd     r8,r5,r11
 
        mtctr   r8
-setup:
+.Lsetup:
        dcbt    r9,r4
        dcbz    r9,r3
        add     r9,r9,r12
-       bdnz    setup
+       bdnz    .Lsetup
 END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
        addi    r3,r3,-8
        srdi    r8,r5,7         /* page is copied in 128 byte strides */
index c1427b3..383a5d0 100644 (file)
        
 #define STK_PARM(i)     (48 + ((i)-3)*8)
 
-#ifdef CONFIG_HCALL_STATS
+#ifdef CONFIG_TRACEPOINTS
+
+       .section        ".toc","aw"
+
+       .globl hcall_tracepoint_refcount
+hcall_tracepoint_refcount:
+       .llong  0
+
+       .section        ".text"
+
 /*
  * precall must preserve all registers.  use unused STK_PARM()
- * areas to save snapshots and opcode.
+ * areas to save snapshots and opcode. We branch around this
+ * in early init (eg when populating the MMU hashtable) by using an
+ * unconditional cpu feature.
  */
-#define HCALL_INST_PRECALL                                     \
-       std     r3,STK_PARM(r3)(r1);    /* save opcode */       \
-       mftb    r0;                     /* get timebase and */  \
-       std     r0,STK_PARM(r5)(r1);    /* save for later */    \
+#define HCALL_INST_PRECALL(FIRST_REG)                          \
 BEGIN_FTR_SECTION;                                             \
-       mfspr   r0,SPRN_PURR;           /* get PURR and */      \
-       std     r0,STK_PARM(r6)(r1);    /* save for later */    \
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);
-       
+       b       1f;                                             \
+END_FTR_SECTION(0, 1);                                         \
+       ld      r12,hcall_tracepoint_refcount@toc(r2);          \
+       cmpdi   r12,0;                                          \
+       beq+    1f;                                             \
+       mflr    r0;                                             \
+       std     r3,STK_PARM(r3)(r1);                            \
+       std     r4,STK_PARM(r4)(r1);                            \
+       std     r5,STK_PARM(r5)(r1);                            \
+       std     r6,STK_PARM(r6)(r1);                            \
+       std     r7,STK_PARM(r7)(r1);                            \
+       std     r8,STK_PARM(r8)(r1);                            \
+       std     r9,STK_PARM(r9)(r1);                            \
+       std     r10,STK_PARM(r10)(r1);                          \
+       std     r0,16(r1);                                      \
+       addi    r4,r1,STK_PARM(FIRST_REG);                      \
+       stdu    r1,-STACK_FRAME_OVERHEAD(r1);                   \
+       bl      .__trace_hcall_entry;                           \
+       addi    r1,r1,STACK_FRAME_OVERHEAD;                     \
+       ld      r0,16(r1);                                      \
+       ld      r3,STK_PARM(r3)(r1);                            \
+       ld      r4,STK_PARM(r4)(r1);                            \
+       ld      r5,STK_PARM(r5)(r1);                            \
+       ld      r6,STK_PARM(r6)(r1);                            \
+       ld      r7,STK_PARM(r7)(r1);                            \
+       ld      r8,STK_PARM(r8)(r1);                            \
+       ld      r9,STK_PARM(r9)(r1);                            \
+       ld      r10,STK_PARM(r10)(r1);                          \
+       mtlr    r0;                                             \
+1:
+
 /*
  * postcall is performed immediately before function return which
  * allows liberal use of volatile registers.  We branch around this
  * in early init (eg when populating the MMU hashtable) by using an
  * unconditional cpu feature.
  */
-#define HCALL_INST_POSTCALL                                    \
+#define __HCALL_INST_POSTCALL                                  \
 BEGIN_FTR_SECTION;                                             \
        b       1f;                                             \
 END_FTR_SECTION(0, 1);                                         \
-       ld      r4,STK_PARM(r3)(r1);    /* validate opcode */   \
-       cmpldi  cr7,r4,MAX_HCALL_OPCODE;                        \
-       bgt-    cr7,1f;                                         \
-                                                               \
-       /* get time and PURR snapshots after hcall */           \
-       mftb    r7;                     /* timebase after */    \
-BEGIN_FTR_SECTION;                                             \
-       mfspr   r8,SPRN_PURR;           /* PURR after */        \
-       ld      r6,STK_PARM(r6)(r1);    /* PURR before */       \
-       subf    r6,r6,r8;               /* delta */             \
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);                           \
-       ld      r5,STK_PARM(r5)(r1);    /* timebase before */   \
-       subf    r5,r5,r7;               /* time delta */        \
-                                                               \
-       /* calculate address of stat structure r4 = opcode */   \
-       srdi    r4,r4,2;                /* index into array */  \
-       mulli   r4,r4,HCALL_STAT_SIZE;                          \
-       LOAD_REG_ADDR(r7, per_cpu__hcall_stats);                \
-       add     r4,r4,r7;                                       \
-       ld      r7,PACA_DATA_OFFSET(r13); /* per cpu offset */  \
-       add     r4,r4,r7;                                       \
-                                                               \
-       /* update stats */                                      \
-       ld      r7,HCALL_STAT_CALLS(r4); /* count */            \
-       addi    r7,r7,1;                                        \
-       std     r7,HCALL_STAT_CALLS(r4);                        \
-       ld      r7,HCALL_STAT_TB(r4);   /* timebase */          \
-       add     r7,r7,r5;                                       \
-       std     r7,HCALL_STAT_TB(r4);                           \
-BEGIN_FTR_SECTION;                                             \
-       ld      r7,HCALL_STAT_PURR(r4); /* PURR */              \
-       add     r7,r7,r6;                                       \
-       std     r7,HCALL_STAT_PURR(r4);                         \
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);                           \
+       ld      r12,hcall_tracepoint_refcount@toc(r2);          \
+       cmpdi   r12,0;                                          \
+       beq+    1f;                                             \
+       mflr    r0;                                             \
+       ld      r6,STK_PARM(r3)(r1);                            \
+       std     r3,STK_PARM(r3)(r1);                            \
+       mr      r4,r3;                                          \
+       mr      r3,r6;                                          \
+       std     r0,16(r1);                                      \
+       stdu    r1,-STACK_FRAME_OVERHEAD(r1);                   \
+       bl      .__trace_hcall_exit;                            \
+       addi    r1,r1,STACK_FRAME_OVERHEAD;                     \
+       ld      r0,16(r1);                                      \
+       ld      r3,STK_PARM(r3)(r1);                            \
+       mtlr    r0;                                             \
 1:
+
+#define HCALL_INST_POSTCALL_NORETS                             \
+       li      r5,0;                                           \
+       __HCALL_INST_POSTCALL
+
+#define HCALL_INST_POSTCALL(BUFREG)                            \
+       mr      r5,BUFREG;                                      \
+       __HCALL_INST_POSTCALL
+
 #else
-#define HCALL_INST_PRECALL
-#define HCALL_INST_POSTCALL
+#define HCALL_INST_PRECALL(FIRST_ARG)
+#define HCALL_INST_POSTCALL_NORETS
+#define HCALL_INST_POSTCALL(BUFREG)
 #endif
 
        .text
@@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets)
        mfcr    r0
        stw     r0,8(r1)
 
-       HCALL_INST_PRECALL
+       HCALL_INST_PRECALL(r4)
 
        HVSC                            /* invoke the hypervisor */
 
-       HCALL_INST_POSTCALL
+       HCALL_INST_POSTCALL_NORETS
 
        lwz     r0,8(r1)
        mtcrf   0xff,r0
@@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall)
        mfcr    r0
        stw     r0,8(r1)
 
-       HCALL_INST_PRECALL
+       HCALL_INST_PRECALL(r5)
 
        std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
@@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall)
        std     r6, 16(r12)
        std     r7, 24(r12)
 
-       HCALL_INST_POSTCALL
+       HCALL_INST_POSTCALL(r12)
 
        lwz     r0,8(r1)
        mtcrf   0xff,r0
@@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9)
        mfcr    r0
        stw     r0,8(r1)
 
-       HCALL_INST_PRECALL
+       HCALL_INST_PRECALL(r5)
 
        std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
@@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9)
        std     r11,56(r12)
        std     r0, 64(r12)
 
-       HCALL_INST_POSTCALL
+       HCALL_INST_POSTCALL(r12)
 
        lwz     r0,8(r1)
        mtcrf   0xff,r0
index 3631a4f..2f58c71 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/hvcall.h>
 #include <asm/firmware.h>
 #include <asm/cputable.h>
+#include <asm/trace.h>
 
 DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
 
@@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = {
 #define        HCALL_ROOT_DIR          "hcall_inst"
 #define CPU_NAME_BUF_SIZE      32
 
+
+static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
+{
+       struct hcall_stats *h;
+
+       if (opcode > MAX_HCALL_OPCODE)
+               return;
+
+       h = &get_cpu_var(hcall_stats)[opcode / 4];
+       h->tb_start = mftb();
+       h->purr_start = mfspr(SPRN_PURR);
+}
+
+static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
+                            unsigned long *retbuf)
+{
+       struct hcall_stats *h;
+
+       if (opcode > MAX_HCALL_OPCODE)
+               return;
+
+       h = &__get_cpu_var(hcall_stats)[opcode / 4];
+       h->num_calls++;
+       h->tb_total = mftb() - h->tb_start;
+       h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
+
+       put_cpu_var(hcall_stats);
+}
+
 static int __init hcall_inst_init(void)
 {
        struct dentry *hcall_root;
@@ -110,6 +140,14 @@ static int __init hcall_inst_init(void)
        if (!firmware_has_feature(FW_FEATURE_LPAR))
                return 0;
 
+       if (register_trace_hcall_entry(probe_hcall_entry))
+               return -EINVAL;
+
+       if (register_trace_hcall_exit(probe_hcall_exit)) {
+               unregister_trace_hcall_entry(probe_hcall_entry);
+               return -EINVAL;
+       }
+
        hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
        if (!hcall_root)
                return -ENOMEM;
index 903eb9e..0707653 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/cputable.h>
 #include <asm/udbg.h>
 #include <asm/smp.h>
+#include <asm/trace.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
@@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order)
 EXPORT_SYMBOL(arch_free_page);
 
 #endif
+
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * We optimise our hcall path by placing hcall_tracepoint_refcount
+ * directly in the TOC so we can check if the hcall tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long hcall_tracepoint_refcount;
+
+void hcall_tracepoint_regfunc(void)
+{
+       hcall_tracepoint_refcount++;
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+       hcall_tracepoint_refcount--;
+}
+
+void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+{
+       trace_hcall_entry(opcode, args);
+}
+
+void __trace_hcall_exit(long opcode, unsigned long retval,
+                       unsigned long *retbuf)
+{
+       trace_hcall_exit(opcode, retval, retbuf);
+}
+#endif
index 91a2b43..e3fb256 100644 (file)
@@ -106,6 +106,8 @@ enum perf_sw_ids {
        PERF_COUNT_SW_CPU_MIGRATIONS            = 4,
        PERF_COUNT_SW_PAGE_FAULTS_MIN           = 5,
        PERF_COUNT_SW_PAGE_FAULTS_MAJ           = 6,
+       PERF_COUNT_SW_ALIGNMENT_FAULTS          = 7,
+       PERF_COUNT_SW_EMULATION_FAULTS          = 8,
 
        PERF_COUNT_SW_MAX,                      /* non-ABI */
 };
index ec3768a..df4e73e 100644 (file)
@@ -102,6 +102,8 @@ enum perf_sw_ids {
        PERF_COUNT_SW_CPU_MIGRATIONS            = 4,
        PERF_COUNT_SW_PAGE_FAULTS_MIN           = 5,
        PERF_COUNT_SW_PAGE_FAULTS_MAJ           = 6,
+       PERF_COUNT_SW_ALIGNMENT_FAULTS          = 7,
+       PERF_COUNT_SW_EMULATION_FAULTS          = 8,
 
        PERF_COUNT_SW_MAX,                      /* non-ABI */
 };
index a69d4ed..6f4ed3b 100644 (file)
@@ -4274,6 +4274,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
        case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
        case PERF_COUNT_SW_CONTEXT_SWITCHES:
        case PERF_COUNT_SW_CPU_MIGRATIONS:
+       case PERF_COUNT_SW_ALIGNMENT_FAULTS:
+       case PERF_COUNT_SW_EMULATION_FAULTS:
                if (!event->parent) {
                        atomic_inc(&perf_swevent_enabled[event_id]);
                        event->destroy = sw_perf_event_destroy;
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
new file mode 100644 (file)
index 0000000..ae525ac
--- /dev/null
@@ -0,0 +1,120 @@
+perf-bench(1)
+============
+
+NAME
+----
+perf-bench - General framework for benchmark suites
+
+SYNOPSIS
+--------
+[verse]
+'perf bench' [<common options>] <subsystem> <suite> [<options>]
+
+DESCRIPTION
+-----------
+This 'perf bench' command is general framework for benchmark suites.
+
+COMMON OPTIONS
+--------------
+-f::
+--format=::
+Specify format style.
+Current available format styles are,
+
+'default'::
+Default style. This is mainly for human reading.
+---------------------
+% perf bench sched pipe                      # with no style specify
+(executing 1000000 pipe operations between two tasks)
+        Total time:5.855 sec
+                5.855061 usecs/op
+               170792 ops/sec
+---------------------
+
+'simple'::
+This simple style is friendly for automated
+processing by scripts.
+---------------------
+% perf bench --format=simple sched pipe      # specified simple
+5.988
+---------------------
+
+SUBSYSTEM
+---------
+
+'sched'::
+       Scheduler and IPC mechanisms.
+
+SUITES FOR 'sched'
+~~~~~~~~~~~~~~~~~~
+*messaging*::
+Suite for evaluating performance of scheduler and IPC mechanisms.
+Based on hackbench by Rusty Russell.
+
+Options of *pipe*
+^^^^^^^^^^^^^^^^^
+-p::
+--pipe::
+Use pipe() instead of socketpair()
+
+-t::
+--thread::
+Be multi thread instead of multi process
+
+-g::
+--group=::
+Specify number of groups
+
+-l::
+--loop=::
+Specify number of loops
+
+Example of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched messaging                 # run with default
+options (20 sender and receiver processes per group)
+(10 groups == 400 processes run)
+
+      Total time:0.308 sec
+
+% perf bench sched messaging -t -g 20        # be multi-thread,with 20 groups
+(20 sender and receiver threads per group)
+(20 groups == 800 threads run)
+
+      Total time:0.582 sec
+---------------------
+
+*pipe*::
+Suite for pipe() system call.
+Based on pipe-test-1m.c by Ingo Molnar.
+
+Options of *pipe*
+^^^^^^^^^^^^^^^^^
+-l::
+--loop=::
+Specify number of loops.
+
+Example of *pipe*
+^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched pipe
+(executing 1000000 pipe operations between two tasks)
+
+        Total time:8.091 sec
+                8.091833 usecs/op
+                123581 ops/sec
+
+% perf bench sched pipe -l 1000              # loop 1000
+(executing 1000 pipe operations between two tasks)
+
+        Total time:0.016 sec
+                16.948000 usecs/op
+                59004 ops/sec
+---------------------
+
+SEE ALSO
+--------
+linkperf:perf[1]
index e6d4272..f7cd896 100644 (file)
@@ -421,6 +421,13 @@ LIB_OBJS += util/hist.o
 LIB_OBJS += util/data_map.o
 
 BUILTIN_OBJS += builtin-annotate.o
+
+BUILTIN_OBJS += builtin-bench.o
+
+# Benchmark modules
+BUILTIN_OBJS += bench/sched-messaging.o
+BUILTIN_OBJS += bench/sched-pipe.o
+
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-sched.o
 BUILTIN_OBJS += builtin-list.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
new file mode 100644 (file)
index 0000000..9fbd8d7
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef BENCH_H
+#define BENCH_H
+
+extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
+extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+
+#define BENCH_FORMAT_DEFAULT_STR       "default"
+#define BENCH_FORMAT_DEFAULT           0
+#define BENCH_FORMAT_SIMPLE_STR                "simple"
+#define BENCH_FORMAT_SIMPLE            1
+
+#define BENCH_FORMAT_UNKNOWN           -1
+
+extern int bench_format;
+
+#endif
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
new file mode 100644 (file)
index 0000000..605a2a9
--- /dev/null
@@ -0,0 +1,336 @@
+/*
+ *
+ * builtin-bench-messaging.c
+ *
+ * messaging: Benchmark for scheduler and IPC mechanisms
+ *
+ * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../builtin.h"
+#include "bench.h"
+
+/* Test groups of 20 processes spraying to 20 receivers */
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/poll.h>
+#include <limits.h>
+
+#define DATASIZE 100
+
+static int use_pipes = 0;
+static unsigned int loops = 100;
+static unsigned int thread_mode = 0;
+static unsigned int num_groups = 10;
+
+struct sender_context {
+       unsigned int num_fds;
+       int ready_out;
+       int wakefd;
+       int out_fds[0];
+};
+
+struct receiver_context {
+       unsigned int num_packets;
+       int in_fds[2];
+       int ready_out;
+       int wakefd;
+};
+
+static void barf(const char *msg)
+{
+       fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
+       exit(1);
+}
+
+static void fdpair(int fds[2])
+{
+       if (use_pipes) {
+               if (pipe(fds) == 0)
+                       return;
+       } else {
+               if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
+                       return;
+       }
+
+       barf(use_pipes ? "pipe()" : "socketpair()");
+}
+
+/* Block until we're ready to go */
+static void ready(int ready_out, int wakefd)
+{
+       char dummy;
+       struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
+
+       /* Tell them we're ready. */
+       if (write(ready_out, &dummy, 1) != 1)
+               barf("CLIENT: ready write");
+
+       /* Wait for "GO" signal */
+       if (poll(&pollfd, 1, -1) != 1)
+               barf("poll");
+}
+
+/* Sender sprays loops messages down each file descriptor */
+static void *sender(struct sender_context *ctx)
+{
+       char data[DATASIZE];
+       unsigned int i, j;
+
+       ready(ctx->ready_out, ctx->wakefd);
+
+       /* Now pump to every receiver. */
+       for (i = 0; i < loops; i++) {
+               for (j = 0; j < ctx->num_fds; j++) {
+                       int ret, done = 0;
+
+again:
+                       ret = write(ctx->out_fds[j], data + done,
+                                   sizeof(data)-done);
+                       if (ret < 0)
+                               barf("SENDER: write");
+                       done += ret;
+                       if (done < DATASIZE)
+                               goto again;
+               }
+       }
+
+       return NULL;
+}
+
+
+/* One receiver per fd */
+static void *receiver(struct receiver_context* ctx)
+{
+       unsigned int i;
+
+       if (!thread_mode)
+               close(ctx->in_fds[1]);
+
+       /* Wait for start... */
+       ready(ctx->ready_out, ctx->wakefd);
+
+       /* Receive them all */
+       for (i = 0; i < ctx->num_packets; i++) {
+               char data[DATASIZE];
+               int ret, done = 0;
+
+again:
+               ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
+               if (ret < 0)
+                       barf("SERVER: read");
+               done += ret;
+               if (done < DATASIZE)
+                       goto again;
+       }
+
+       return NULL;
+}
+
+static pthread_t create_worker(void *ctx, void *(*func)(void *))
+{
+       pthread_attr_t attr;
+       pthread_t childid;
+       int err;
+
+       if (!thread_mode) {
+               /* process mode */
+               /* Fork the receiver. */
+               switch (fork()) {
+               case -1:
+                       barf("fork()");
+                       break;
+               case 0:
+                       (*func) (ctx);
+                       exit(0);
+                       break;
+               default:
+                       break;
+               }
+
+               return (pthread_t)0;
+       }
+
+       if (pthread_attr_init(&attr) != 0)
+               barf("pthread_attr_init:");
+
+#ifndef __ia64__
+       if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
+               barf("pthread_attr_setstacksize");
+#endif
+
+       err = pthread_create(&childid, &attr, func, ctx);
+       if (err != 0) {
+               fprintf(stderr, "pthread_create failed: %s (%d)\n",
+                       strerror(err), err);
+               exit(-1);
+       }
+       return childid;
+}
+
+static void reap_worker(pthread_t id)
+{
+       int proc_status;
+       void *thread_status;
+
+       if (!thread_mode) {
+               /* process mode */
+               wait(&proc_status);
+               if (!WIFEXITED(proc_status))
+                       exit(1);
+       } else {
+               pthread_join(id, &thread_status);
+       }
+}
+
+/* One group of senders and receivers */
+static unsigned int group(pthread_t *pth,
+               unsigned int num_fds,
+               int ready_out,
+               int wakefd)
+{
+       unsigned int i;
+       struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
+                       + num_fds * sizeof(int));
+
+       if (!snd_ctx)
+               barf("malloc()");
+
+       for (i = 0; i < num_fds; i++) {
+               int fds[2];
+               struct receiver_context *ctx = malloc(sizeof(*ctx));
+
+               if (!ctx)
+                       barf("malloc()");
+
+
+               /* Create the pipe between client and server */
+               fdpair(fds);
+
+               ctx->num_packets = num_fds * loops;
+               ctx->in_fds[0] = fds[0];
+               ctx->in_fds[1] = fds[1];
+               ctx->ready_out = ready_out;
+               ctx->wakefd = wakefd;
+
+               pth[i] = create_worker(ctx, (void *)receiver);
+
+               snd_ctx->out_fds[i] = fds[1];
+               if (!thread_mode)
+                       close(fds[0]);
+       }
+
+       /* Now we have all the fds, fork the senders */
+       for (i = 0; i < num_fds; i++) {
+               snd_ctx->ready_out = ready_out;
+               snd_ctx->wakefd = wakefd;
+               snd_ctx->num_fds = num_fds;
+
+               pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
+       }
+
+       /* Close the fds we have left */
+       if (!thread_mode)
+               for (i = 0; i < num_fds; i++)
+                       close(snd_ctx->out_fds[i]);
+
+       /* Return number of children to reap */
+       return num_fds * 2;
+}
+
+static const struct option options[] = {
+       OPT_BOOLEAN('p', "pipe", &use_pipes,
+                   "Use pipe() instead of socketpair()"),
+       OPT_BOOLEAN('t', "thread", &thread_mode,
+                   "Be multi thread instead of multi process"),
+       OPT_INTEGER('g', "group", &num_groups,
+                   "Specify number of groups"),
+       OPT_INTEGER('l', "loop", &loops,
+                   "Specify number of loops"),
+       OPT_END()
+};
+
+static const char * const bench_sched_message_usage[] = {
+       "perf bench sched messaging <options>",
+       NULL
+};
+
+int bench_sched_messaging(int argc, const char **argv,
+                   const char *prefix __used)
+{
+       unsigned int i, total_children;
+       struct timeval start, stop, diff;
+       unsigned int num_fds = 20;
+       int readyfds[2], wakefds[2];
+       char dummy;
+       pthread_t *pth_tab;
+
+       argc = parse_options(argc, argv, options,
+                            bench_sched_message_usage, 0);
+
+       pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
+       if (!pth_tab)
+               barf("main:malloc()");
+
+       fdpair(readyfds);
+       fdpair(wakefds);
+
+       total_children = 0;
+       for (i = 0; i < num_groups; i++)
+               total_children += group(pth_tab+total_children, num_fds,
+                                       readyfds[1], wakefds[0]);
+
+       /* Wait for everyone to be ready */
+       for (i = 0; i < total_children; i++)
+               if (read(readyfds[0], &dummy, 1) != 1)
+                       barf("Reading for readyfds");
+
+       gettimeofday(&start, NULL);
+
+       /* Kick them off */
+       if (write(wakefds[1], &dummy, 1) != 1)
+               barf("Writing to start them");
+
+       /* Reap them all */
+       for (i = 0; i < total_children; i++)
+               reap_worker(pth_tab[i]);
+
+       gettimeofday(&stop, NULL);
+
+       timersub(&stop, &start, &diff);
+
+       switch (bench_format) {
+       case BENCH_FORMAT_DEFAULT:
+               printf("# %d sender and receiver %s per group\n",
+                      num_fds, thread_mode ? "threads" : "processes");
+               printf("# %d groups == %d %s run\n\n",
+                      num_groups, num_groups * 2 * num_fds,
+                      thread_mode ? "threads" : "processes");
+               printf(" %14s: %lu.%03lu [sec]\n", "Total time",
+                      diff.tv_sec, diff.tv_usec/1000);
+               break;
+       case BENCH_FORMAT_SIMPLE:
+               printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
+               break;
+       default:
+               /* reaching here is something disaster */
+               fprintf(stderr, "Unknown format:%d\n", bench_format);
+               exit(1);
+               break;
+       }
+
+       return 0;
+}
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
new file mode 100644 (file)
index 0000000..238185f
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ *
+ * builtin-bench-pipe.c
+ *
+ * pipe: Benchmark for pipe()
+ *
+ * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
+ *  http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../builtin.h"
+#include "bench.h"
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <linux/unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#define LOOPS_DEFAULT 1000000
+static int loops = LOOPS_DEFAULT;
+
+static const struct option options[] = {
+       OPT_INTEGER('l', "loop", &loops,
+                   "Specify number of loops"),
+       OPT_END()
+};
+
+static const char * const bench_sched_pipe_usage[] = {
+       "perf bench sched pipe <options>",
+       NULL
+};
+
+int bench_sched_pipe(int argc, const char **argv,
+                    const char *prefix __used)
+{
+       int pipe_1[2], pipe_2[2];
+       int m = 0, i;
+       struct timeval start, stop, diff;
+       unsigned long long result_usec = 0;
+
+       /*
+        * why does "ret" exist?
+        * discarding returned value of read(), write()
+        * causes error in building environment for perf
+        */
+       int ret, wait_stat;
+       pid_t pid, retpid;
+
+       argc = parse_options(argc, argv, options,
+                            bench_sched_pipe_usage, 0);
+
+       assert(!pipe(pipe_1));
+       assert(!pipe(pipe_2));
+
+       pid = fork();
+       assert(pid >= 0);
+
+       gettimeofday(&start, NULL);
+
+       if (!pid) {
+               for (i = 0; i < loops; i++) {
+                       ret = read(pipe_1[0], &m, sizeof(int));
+                       ret = write(pipe_2[1], &m, sizeof(int));
+               }
+       } else {
+               for (i = 0; i < loops; i++) {
+                       ret = write(pipe_1[1], &m, sizeof(int));
+                       ret = read(pipe_2[0], &m, sizeof(int));
+               }
+       }
+
+       gettimeofday(&stop, NULL);
+       timersub(&stop, &start, &diff);
+
+       if (pid) {
+               retpid = waitpid(pid, &wait_stat, 0);
+               assert((retpid == pid) && WIFEXITED(wait_stat));
+               return 0;
+       }
+
+       switch (bench_format) {
+       case BENCH_FORMAT_DEFAULT:
+               printf("# Extecuted %d pipe operations between two tasks\n\n",
+                       loops);
+
+               result_usec = diff.tv_sec * 1000000;
+               result_usec += diff.tv_usec;
+
+               printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+                      diff.tv_sec, diff.tv_usec/1000);
+
+               printf(" %14lf usecs/op\n",
+                      (double)result_usec / (double)loops);
+               printf(" %14d ops/sec\n",
+                      (int)((double)loops /
+                            ((double)result_usec / (double)1000000)));
+               break;
+
+       case BENCH_FORMAT_SIMPLE:
+               printf("%lu.%03lu\n",
+                      diff.tv_sec, diff.tv_usec / 1000);
+               break;
+
+       default:
+               /* reaching here is something disaster */
+               fprintf(stderr, "Unknown format:%d\n", bench_format);
+               exit(1);
+               break;
+       }
+
+       return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
new file mode 100644 (file)
index 0000000..90c39ba
--- /dev/null
@@ -0,0 +1,183 @@
+/*
+ *
+ * builtin-bench.c
+ *
+ * General benchmarking subsystem provided by perf
+ *
+ * Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+/*
+ *
+ * Available subsystem list:
+ *  sched ... scheduler and IPC mechanism
+ *
+ */
+
+#include "perf.h"
+#include "util/util.h"
+#include "util/parse-options.h"
+#include "builtin.h"
+#include "bench/bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct bench_suite {
+       const char *name;
+       const char *summary;
+       int (*fn)(int, const char **, const char *);
+};
+
+static struct bench_suite sched_suites[] = {
+       { "messaging",
+         "Benchmark for scheduler and IPC mechanisms",
+         bench_sched_messaging },
+       { "pipe",
+         "Flood of communication over pipe() between two processes",
+         bench_sched_pipe      },
+       { NULL,
+         NULL,
+         NULL                  }
+};
+
+struct bench_subsys {
+       const char *name;
+       const char *summary;
+       struct bench_suite *suites;
+};
+
+static struct bench_subsys subsystems[] = {
+       { "sched",
+         "scheduler and IPC mechanism",
+         sched_suites },
+       { NULL,
+         NULL,
+         NULL         }
+};
+
+static void dump_suites(int subsys_index)
+{
+       int i;
+
+       printf("List of available suites for %s...\n\n",
+              subsystems[subsys_index].name);
+
+       for (i = 0; subsystems[subsys_index].suites[i].name; i++)
+               printf("\t%s: %s\n",
+                      subsystems[subsys_index].suites[i].name,
+                      subsystems[subsys_index].suites[i].summary);
+
+       printf("\n");
+       return;
+}
+
+static char *bench_format_str;
+int bench_format = BENCH_FORMAT_DEFAULT;
+
+static const struct option bench_options[] = {
+       OPT_STRING('f', "format", &bench_format_str, "default",
+                   "Specify format style"),
+       OPT_END()
+};
+
+static const char * const bench_usage[] = {
+       "perf bench [<common options>] <subsystem> <suite> [<options>]",
+       NULL
+};
+
+static void print_usage(void)
+{
+       int i;
+
+       printf("Usage: \n");
+       for (i = 0; bench_usage[i]; i++)
+               printf("\t%s\n", bench_usage[i]);
+       printf("\n");
+
+       printf("List of available subsystems...\n\n");
+
+       for (i = 0; subsystems[i].name; i++)
+               printf("\t%s: %s\n",
+                      subsystems[i].name, subsystems[i].summary);
+       printf("\n");
+}
+
+static int bench_str2int(char *str)
+{
+       if (!str)
+               return BENCH_FORMAT_DEFAULT;
+
+       if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
+               return BENCH_FORMAT_DEFAULT;
+       else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
+               return BENCH_FORMAT_SIMPLE;
+
+       return BENCH_FORMAT_UNKNOWN;
+}
+
+int cmd_bench(int argc, const char **argv, const char *prefix __used)
+{
+       int i, j, status = 0;
+
+       if (argc < 2) {
+               /* No subsystem specified. */
+               print_usage();
+               goto end;
+       }
+
+       argc = parse_options(argc, argv, bench_options, bench_usage,
+                            PARSE_OPT_STOP_AT_NON_OPTION);
+
+       bench_format = bench_str2int(bench_format_str);
+       if (bench_format == BENCH_FORMAT_UNKNOWN) {
+               printf("Unknown format descriptor:%s\n", bench_format_str);
+               goto end;
+       }
+
+       if (argc < 1) {
+               print_usage();
+               goto end;
+       }
+
+       for (i = 0; subsystems[i].name; i++) {
+               if (strcmp(subsystems[i].name, argv[0]))
+                       continue;
+
+               if (argc < 2) {
+                       /* No suite specified. */
+                       dump_suites(i);
+                       goto end;
+               }
+
+               for (j = 0; subsystems[i].suites[j].name; j++) {
+                       if (strcmp(subsystems[i].suites[j].name, argv[1]))
+                               continue;
+
+                       if (bench_format == BENCH_FORMAT_DEFAULT)
+                               printf("# Running %s/%s benchmark...\n",
+                                      subsystems[i].name,
+                                      subsystems[i].suites[j].name);
+                       status = subsystems[i].suites[j].fn(argc - 1,
+                                                           argv + 1, prefix);
+                       goto end;
+               }
+
+               if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+                       dump_suites(i);
+                       goto end;
+               }
+
+               printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
+               status = 1;
+               goto end;
+       }
+
+       printf("Unknown subsystem:%s\n", argv[0]);
+       status = 1;
+
+end:
+       return status;
+}
index e11d8d2..f0cd5b1 100644 (file)
@@ -15,6 +15,7 @@ extern int read_line_with_nul(char *buf, int size, FILE *file);
 extern int check_pager_config(const char *cmd);
 
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
+extern int cmd_bench(int argc, const char **argv, const char *prefix);
 extern int cmd_help(int argc, const char **argv, const char *prefix);
 extern int cmd_sched(int argc, const char **argv, const char *prefix);
 extern int cmd_list(int argc, const char **argv, const char *prefix);
index 00326e2..981c40b 100644 (file)
@@ -3,6 +3,7 @@
 # command name                 category [deprecated] [common]
 #
 perf-annotate                  mainporcelain common
+perf-bench                     mainporcelain common
 perf-list                      mainporcelain common
 perf-sched                     mainporcelain common
 perf-record                    mainporcelain common
index fdd42a8..f000c30 100644 (file)
@@ -137,6 +137,8 @@ enum sw_event_ids {
        PERF_COUNT_SW_CPU_MIGRATIONS    = 4,
        PERF_COUNT_SW_PAGE_FAULTS_MIN   = 5,
        PERF_COUNT_SW_PAGE_FAULTS_MAJ   = 6,
+       PERF_COUNT_SW_ALIGNMENT_FAULTS  = 7,
+       PERF_COUNT_SW_EMULATION_FAULTS  = 8,
 };
 
 Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
index 601f403..8936786 100644 (file)
@@ -289,6 +289,7 @@ static void handle_internal_command(int argc, const char **argv)
                { "list", cmd_list, 0 },
                { "record", cmd_record, 0 },
                { "report", cmd_report, 0 },
+               { "bench", cmd_bench, 0 },
                { "stat", cmd_stat, 0 },
                { "timechart", cmd_timechart, 0 },
                { "top", cmd_top, 0 },
index 097938a..0faf4f2 100644 (file)
@@ -48,6 +48,8 @@ static struct event_symbol event_symbols[] = {
   { CSW(PAGE_FAULTS_MAJ),      "major-faults",         ""              },
   { CSW(CONTEXT_SWITCHES),     "context-switches",     "cs"            },
   { CSW(CPU_MIGRATIONS),       "cpu-migrations",       "migrations"    },
+  { CSW(ALIGNMENT_FAULTS),     "alignment-faults",     ""              },
+  { CSW(EMULATION_FAULTS),     "emulation-faults",     ""              },
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
@@ -76,6 +78,8 @@ static const char *sw_event_names[] = {
        "CPU-migrations",
        "minor-faults",
        "major-faults",
+       "alignment-faults",
+       "emulation-faults",
 };
 
 #define MAX_ALIASES 8