Merge commit 'linus/master' into tracing/kprobes
[safe/jmp/linux-2.6] / kernel / trace / trace_kprobe.c
1 /*
2  * kprobe based kernel tracer
3  *
4  * Created by Masami Hiramatsu <mhiramat@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19
20 #include <linux/module.h>
21 #include <linux/uaccess.h>
22 #include <linux/kprobes.h>
23 #include <linux/seq_file.h>
24 #include <linux/slab.h>
25 #include <linux/smp.h>
26 #include <linux/debugfs.h>
27 #include <linux/types.h>
28 #include <linux/string.h>
29 #include <linux/ctype.h>
30 #include <linux/ptrace.h>
31 #include <linux/perf_event.h>
32
33 #include "trace.h"
34 #include "trace_output.h"
35
36 #define MAX_TRACE_ARGS 128
37 #define MAX_ARGSTR_LEN 63
38 #define MAX_EVENT_NAME_LEN 64
39 #define KPROBE_EVENT_SYSTEM "kprobes"
40
41 /* currently, trace_kprobe only supports X86. */
42
43 struct fetch_func {
44         unsigned long (*func)(struct pt_regs *, void *);
45         void *data;
46 };
47
48 static __kprobes unsigned long call_fetch(struct fetch_func *f,
49                                           struct pt_regs *regs)
50 {
51         return f->func(regs, f->data);
52 }
53
54 /* fetch handlers */
55 static __kprobes unsigned long fetch_register(struct pt_regs *regs,
56                                               void *offset)
57 {
58         return regs_get_register(regs, (unsigned int)((unsigned long)offset));
59 }
60
61 static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
62                                            void *num)
63 {
64         return regs_get_kernel_stack_nth(regs,
65                                          (unsigned int)((unsigned long)num));
66 }
67
68 static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
69 {
70         unsigned long retval;
71
72         if (probe_kernel_address(addr, retval))
73                 return 0;
74         return retval;
75 }
76
77 static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
78 {
79         return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
80 }
81
82 static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
83                                               void *dummy)
84 {
85         return regs_return_value(regs);
86 }
87
88 static __kprobes unsigned long fetch_ip(struct pt_regs *regs, void *dummy)
89 {
90         return instruction_pointer(regs);
91 }
92
93 static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
94                                                    void *dummy)
95 {
96         return kernel_stack_pointer(regs);
97 }
98
99 /* Memory fetching by symbol */
100 struct symbol_cache {
101         char *symbol;
102         long offset;
103         unsigned long addr;
104 };
105
106 static unsigned long update_symbol_cache(struct symbol_cache *sc)
107 {
108         sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
109         if (sc->addr)
110                 sc->addr += sc->offset;
111         return sc->addr;
112 }
113
114 static void free_symbol_cache(struct symbol_cache *sc)
115 {
116         kfree(sc->symbol);
117         kfree(sc);
118 }
119
120 static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
121 {
122         struct symbol_cache *sc;
123
124         if (!sym || strlen(sym) == 0)
125                 return NULL;
126         sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
127         if (!sc)
128                 return NULL;
129
130         sc->symbol = kstrdup(sym, GFP_KERNEL);
131         if (!sc->symbol) {
132                 kfree(sc);
133                 return NULL;
134         }
135         sc->offset = offset;
136
137         update_symbol_cache(sc);
138         return sc;
139 }
140
141 static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
142 {
143         struct symbol_cache *sc = data;
144
145         if (sc->addr)
146                 return fetch_memory(regs, (void *)sc->addr);
147         else
148                 return 0;
149 }
150
151 /* Special indirect memory access interface */
152 struct indirect_fetch_data {
153         struct fetch_func orig;
154         long offset;
155 };
156
157 static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
158 {
159         struct indirect_fetch_data *ind = data;
160         unsigned long addr;
161
162         addr = call_fetch(&ind->orig, regs);
163         if (addr) {
164                 addr += ind->offset;
165                 return fetch_memory(regs, (void *)addr);
166         } else
167                 return 0;
168 }
169
170 static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
171 {
172         if (data->orig.func == fetch_indirect)
173                 free_indirect_fetch_data(data->orig.data);
174         else if (data->orig.func == fetch_symbol)
175                 free_symbol_cache(data->orig.data);
176         kfree(data);
177 }
178
179 /**
180  * Kprobe tracer core functions
181  */
182
183 struct probe_arg {
184         struct fetch_func       fetch;
185         const char              *name;
186 };
187
188 /* Flags for trace_probe */
189 #define TP_FLAG_TRACE   1
190 #define TP_FLAG_PROFILE 2
191
192 struct trace_probe {
193         struct list_head        list;
194         struct kretprobe        rp;     /* Use rp.kp for kprobe use */
195         unsigned long           nhit;
196         unsigned int            flags;  /* For TP_FLAG_* */
197         const char              *symbol;        /* symbol name */
198         struct ftrace_event_call        call;
199         struct trace_event              event;
200         unsigned int            nr_args;
201         struct probe_arg        args[];
202 };
203
204 #define SIZEOF_TRACE_PROBE(n)                   \
205         (offsetof(struct trace_probe, args) +   \
206         (sizeof(struct probe_arg) * (n)))
207
208 static __kprobes int probe_is_return(struct trace_probe *tp)
209 {
210         return tp->rp.handler != NULL;
211 }
212
213 static __kprobes const char *probe_symbol(struct trace_probe *tp)
214 {
215         return tp->symbol ? tp->symbol : "unknown";
216 }
217
218 static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
219 {
220         int ret = -EINVAL;
221
222         if (ff->func == fetch_argument)
223                 ret = snprintf(buf, n, "a%lu", (unsigned long)ff->data);
224         else if (ff->func == fetch_register) {
225                 const char *name;
226                 name = regs_query_register_name((unsigned int)((long)ff->data));
227                 ret = snprintf(buf, n, "%%%s", name);
228         } else if (ff->func == fetch_stack)
229                 ret = snprintf(buf, n, "s%lu", (unsigned long)ff->data);
230         else if (ff->func == fetch_memory)
231                 ret = snprintf(buf, n, "@0x%p", ff->data);
232         else if (ff->func == fetch_symbol) {
233                 struct symbol_cache *sc = ff->data;
234                 ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
235         } else if (ff->func == fetch_retvalue)
236                 ret = snprintf(buf, n, "rv");
237         else if (ff->func == fetch_ip)
238                 ret = snprintf(buf, n, "ra");
239         else if (ff->func == fetch_stack_address)
240                 ret = snprintf(buf, n, "sa");
241         else if (ff->func == fetch_indirect) {
242                 struct indirect_fetch_data *id = ff->data;
243                 size_t l = 0;
244                 ret = snprintf(buf, n, "%+ld(", id->offset);
245                 if (ret >= n)
246                         goto end;
247                 l += ret;
248                 ret = probe_arg_string(buf + l, n - l, &id->orig);
249                 if (ret < 0)
250                         goto end;
251                 l += ret;
252                 ret = snprintf(buf + l, n - l, ")");
253                 ret += l;
254         }
255 end:
256         if (ret >= n)
257                 return -ENOSPC;
258         return ret;
259 }
260
261 static int register_probe_event(struct trace_probe *tp);
262 static void unregister_probe_event(struct trace_probe *tp);
263
264 static DEFINE_MUTEX(probe_lock);
265 static LIST_HEAD(probe_list);
266
267 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
268 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
269                                 struct pt_regs *regs);
270
271 /*
272  * Allocate new trace_probe and initialize it (including kprobes).
273  */
274 static struct trace_probe *alloc_trace_probe(const char *group,
275                                              const char *event,
276                                              void *addr,
277                                              const char *symbol,
278                                              unsigned long offs,
279                                              int nargs, int is_return)
280 {
281         struct trace_probe *tp;
282
283         tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
284         if (!tp)
285                 return ERR_PTR(-ENOMEM);
286
287         if (symbol) {
288                 tp->symbol = kstrdup(symbol, GFP_KERNEL);
289                 if (!tp->symbol)
290                         goto error;
291                 tp->rp.kp.symbol_name = tp->symbol;
292                 tp->rp.kp.offset = offs;
293         } else
294                 tp->rp.kp.addr = addr;
295
296         if (is_return)
297                 tp->rp.handler = kretprobe_dispatcher;
298         else
299                 tp->rp.kp.pre_handler = kprobe_dispatcher;
300
301         if (!event)
302                 goto error;
303         tp->call.name = kstrdup(event, GFP_KERNEL);
304         if (!tp->call.name)
305                 goto error;
306
307         if (!group)
308                 goto error;
309         tp->call.system = kstrdup(group, GFP_KERNEL);
310         if (!tp->call.system)
311                 goto error;
312
313         INIT_LIST_HEAD(&tp->list);
314         return tp;
315 error:
316         kfree(tp->call.name);
317         kfree(tp->symbol);
318         kfree(tp);
319         return ERR_PTR(-ENOMEM);
320 }
321
322 static void free_probe_arg(struct probe_arg *arg)
323 {
324         if (arg->fetch.func == fetch_symbol)
325                 free_symbol_cache(arg->fetch.data);
326         else if (arg->fetch.func == fetch_indirect)
327                 free_indirect_fetch_data(arg->fetch.data);
328         kfree(arg->name);
329 }
330
331 static void free_trace_probe(struct trace_probe *tp)
332 {
333         int i;
334
335         for (i = 0; i < tp->nr_args; i++)
336                 free_probe_arg(&tp->args[i]);
337
338         kfree(tp->call.system);
339         kfree(tp->call.name);
340         kfree(tp->symbol);
341         kfree(tp);
342 }
343
344 static struct trace_probe *find_probe_event(const char *event)
345 {
346         struct trace_probe *tp;
347
348         list_for_each_entry(tp, &probe_list, list)
349                 if (!strcmp(tp->call.name, event))
350                         return tp;
351         return NULL;
352 }
353
354 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
355 static void unregister_trace_probe(struct trace_probe *tp)
356 {
357         if (probe_is_return(tp))
358                 unregister_kretprobe(&tp->rp);
359         else
360                 unregister_kprobe(&tp->rp.kp);
361         list_del(&tp->list);
362         unregister_probe_event(tp);
363 }
364
365 /* Register a trace_probe and probe_event */
366 static int register_trace_probe(struct trace_probe *tp)
367 {
368         struct trace_probe *old_tp;
369         int ret;
370
371         mutex_lock(&probe_lock);
372
373         /* register as an event */
374         old_tp = find_probe_event(tp->call.name);
375         if (old_tp) {
376                 /* delete old event */
377                 unregister_trace_probe(old_tp);
378                 free_trace_probe(old_tp);
379         }
380         ret = register_probe_event(tp);
381         if (ret) {
382                 pr_warning("Faild to register probe event(%d)\n", ret);
383                 goto end;
384         }
385
386         tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
387         if (probe_is_return(tp))
388                 ret = register_kretprobe(&tp->rp);
389         else
390                 ret = register_kprobe(&tp->rp.kp);
391
392         if (ret) {
393                 pr_warning("Could not insert probe(%d)\n", ret);
394                 if (ret == -EILSEQ) {
395                         pr_warning("Probing address(0x%p) is not an "
396                                    "instruction boundary.\n",
397                                    tp->rp.kp.addr);
398                         ret = -EINVAL;
399                 }
400                 unregister_probe_event(tp);
401         } else
402                 list_add_tail(&tp->list, &probe_list);
403 end:
404         mutex_unlock(&probe_lock);
405         return ret;
406 }
407
408 /* Split symbol and offset. */
409 static int split_symbol_offset(char *symbol, unsigned long *offset)
410 {
411         char *tmp;
412         int ret;
413
414         if (!offset)
415                 return -EINVAL;
416
417         tmp = strchr(symbol, '+');
418         if (tmp) {
419                 /* skip sign because strict_strtol doesn't accept '+' */
420                 ret = strict_strtoul(tmp + 1, 0, offset);
421                 if (ret)
422                         return ret;
423                 *tmp = '\0';
424         } else
425                 *offset = 0;
426         return 0;
427 }
428
429 #define PARAM_MAX_ARGS 16
430 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
431
432 static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
433 {
434         int ret = 0;
435         unsigned long param;
436         long offset;
437         char *tmp;
438
439         switch (arg[0]) {
440         case 'a':       /* argument */
441                 ret = strict_strtoul(arg + 1, 10, &param);
442                 if (ret || param > PARAM_MAX_ARGS)
443                         ret = -EINVAL;
444                 else {
445                         ff->func = fetch_argument;
446                         ff->data = (void *)param;
447                 }
448                 break;
449         case 'r':       /* retval or retaddr */
450                 if (is_return && arg[1] == 'v') {
451                         ff->func = fetch_retvalue;
452                         ff->data = NULL;
453                 } else if (is_return && arg[1] == 'a') {
454                         ff->func = fetch_ip;
455                         ff->data = NULL;
456                 } else
457                         ret = -EINVAL;
458                 break;
459         case '%':       /* named register */
460                 ret = regs_query_register_offset(arg + 1);
461                 if (ret >= 0) {
462                         ff->func = fetch_register;
463                         ff->data = (void *)(unsigned long)ret;
464                         ret = 0;
465                 }
466                 break;
467         case 's':       /* stack */
468                 if (arg[1] == 'a') {
469                         ff->func = fetch_stack_address;
470                         ff->data = NULL;
471                 } else {
472                         ret = strict_strtoul(arg + 1, 10, &param);
473                         if (ret || param > PARAM_MAX_STACK)
474                                 ret = -EINVAL;
475                         else {
476                                 ff->func = fetch_stack;
477                                 ff->data = (void *)param;
478                         }
479                 }
480                 break;
481         case '@':       /* memory or symbol */
482                 if (isdigit(arg[1])) {
483                         ret = strict_strtoul(arg + 1, 0, &param);
484                         if (ret)
485                                 break;
486                         ff->func = fetch_memory;
487                         ff->data = (void *)param;
488                 } else {
489                         ret = split_symbol_offset(arg + 1, &offset);
490                         if (ret)
491                                 break;
492                         ff->data = alloc_symbol_cache(arg + 1,
493                                                               offset);
494                         if (ff->data)
495                                 ff->func = fetch_symbol;
496                         else
497                                 ret = -EINVAL;
498                 }
499                 break;
500         case '+':       /* indirect memory */
501         case '-':
502                 tmp = strchr(arg, '(');
503                 if (!tmp) {
504                         ret = -EINVAL;
505                         break;
506                 }
507                 *tmp = '\0';
508                 ret = strict_strtol(arg + 1, 0, &offset);
509                 if (ret)
510                         break;
511                 if (arg[0] == '-')
512                         offset = -offset;
513                 arg = tmp + 1;
514                 tmp = strrchr(arg, ')');
515                 if (tmp) {
516                         struct indirect_fetch_data *id;
517                         *tmp = '\0';
518                         id = kzalloc(sizeof(struct indirect_fetch_data),
519                                      GFP_KERNEL);
520                         if (!id)
521                                 return -ENOMEM;
522                         id->offset = offset;
523                         ret = parse_probe_arg(arg, &id->orig, is_return);
524                         if (ret)
525                                 kfree(id);
526                         else {
527                                 ff->func = fetch_indirect;
528                                 ff->data = (void *)id;
529                         }
530                 } else
531                         ret = -EINVAL;
532                 break;
533         default:
534                 /* TODO: support custom handler */
535                 ret = -EINVAL;
536         }
537         return ret;
538 }
539
540 static int create_trace_probe(int argc, char **argv)
541 {
542         /*
543          * Argument syntax:
544          *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
545          *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
546          * Fetch args:
547          *  aN  : fetch Nth of function argument. (N:0-)
548          *  rv  : fetch return value
549          *  ra  : fetch return address
550          *  sa  : fetch stack address
551          *  sN  : fetch Nth of stack (N:0-)
552          *  @ADDR       : fetch memory at ADDR (ADDR should be in kernel)
553          *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
554          *  %REG        : fetch register REG
555          * Indirect memory fetch:
556          *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
557          * Alias name of args:
558          *  NAME=FETCHARG : set NAME as alias of FETCHARG.
559          */
560         struct trace_probe *tp;
561         int i, ret = 0;
562         int is_return = 0;
563         char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
564         unsigned long offset = 0;
565         void *addr = NULL;
566         char buf[MAX_EVENT_NAME_LEN];
567
568         if (argc < 2)
569                 return -EINVAL;
570
571         if (argv[0][0] == 'p')
572                 is_return = 0;
573         else if (argv[0][0] == 'r')
574                 is_return = 1;
575         else
576                 return -EINVAL;
577
578         if (argv[0][1] == ':') {
579                 event = &argv[0][2];
580                 if (strchr(event, '/')) {
581                         group = event;
582                         event = strchr(group, '/') + 1;
583                         event[-1] = '\0';
584                         if (strlen(group) == 0) {
585                                 pr_info("Group name is not specifiled\n");
586                                 return -EINVAL;
587                         }
588                 }
589                 if (strlen(event) == 0) {
590                         pr_info("Event name is not specifiled\n");
591                         return -EINVAL;
592                 }
593         }
594
595         if (isdigit(argv[1][0])) {
596                 if (is_return)
597                         return -EINVAL;
598                 /* an address specified */
599                 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
600                 if (ret)
601                         return ret;
602         } else {
603                 /* a symbol specified */
604                 symbol = argv[1];
605                 /* TODO: support .init module functions */
606                 ret = split_symbol_offset(symbol, &offset);
607                 if (ret)
608                         return ret;
609                 if (offset && is_return)
610                         return -EINVAL;
611         }
612         argc -= 2; argv += 2;
613
614         /* setup a probe */
615         if (!group)
616                 group = KPROBE_EVENT_SYSTEM;
617         if (!event) {
618                 /* Make a new event name */
619                 if (symbol)
620                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
621                                  is_return ? 'r' : 'p', symbol, offset);
622                 else
623                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
624                                  is_return ? 'r' : 'p', addr);
625                 event = buf;
626         }
627         tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
628                                is_return);
629         if (IS_ERR(tp))
630                 return PTR_ERR(tp);
631
632         /* parse arguments */
633         ret = 0;
634         for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
635                 /* Parse argument name */
636                 arg = strchr(argv[i], '=');
637                 if (arg)
638                         *arg++ = '\0';
639                 else
640                         arg = argv[i];
641                 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
642
643                 /* Parse fetch argument */
644                 if (strlen(arg) > MAX_ARGSTR_LEN) {
645                         pr_info("Argument%d(%s) is too long.\n", i, arg);
646                         ret = -ENOSPC;
647                         goto error;
648                 }
649                 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
650                 if (ret)
651                         goto error;
652         }
653         tp->nr_args = i;
654
655         ret = register_trace_probe(tp);
656         if (ret)
657                 goto error;
658         return 0;
659
660 error:
661         free_trace_probe(tp);
662         return ret;
663 }
664
665 static void cleanup_all_probes(void)
666 {
667         struct trace_probe *tp;
668
669         mutex_lock(&probe_lock);
670         /* TODO: Use batch unregistration */
671         while (!list_empty(&probe_list)) {
672                 tp = list_entry(probe_list.next, struct trace_probe, list);
673                 unregister_trace_probe(tp);
674                 free_trace_probe(tp);
675         }
676         mutex_unlock(&probe_lock);
677 }
678
679
680 /* Probes listing interfaces */
681 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
682 {
683         mutex_lock(&probe_lock);
684         return seq_list_start(&probe_list, *pos);
685 }
686
687 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
688 {
689         return seq_list_next(v, &probe_list, pos);
690 }
691
692 static void probes_seq_stop(struct seq_file *m, void *v)
693 {
694         mutex_unlock(&probe_lock);
695 }
696
697 static int probes_seq_show(struct seq_file *m, void *v)
698 {
699         struct trace_probe *tp = v;
700         int i, ret;
701         char buf[MAX_ARGSTR_LEN + 1];
702
703         seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
704         seq_printf(m, ":%s", tp->call.name);
705
706         if (tp->symbol)
707                 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
708         else
709                 seq_printf(m, " 0x%p", tp->rp.kp.addr);
710
711         for (i = 0; i < tp->nr_args; i++) {
712                 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
713                 if (ret < 0) {
714                         pr_warning("Argument%d decoding error(%d).\n", i, ret);
715                         return ret;
716                 }
717                 seq_printf(m, " %s=%s", tp->args[i].name, buf);
718         }
719         seq_printf(m, "\n");
720         return 0;
721 }
722
723 static const struct seq_operations probes_seq_op = {
724         .start  = probes_seq_start,
725         .next   = probes_seq_next,
726         .stop   = probes_seq_stop,
727         .show   = probes_seq_show
728 };
729
730 static int probes_open(struct inode *inode, struct file *file)
731 {
732         if ((file->f_mode & FMODE_WRITE) &&
733             (file->f_flags & O_TRUNC))
734                 cleanup_all_probes();
735
736         return seq_open(file, &probes_seq_op);
737 }
738
739 static int command_trace_probe(const char *buf)
740 {
741         char **argv;
742         int argc = 0, ret = 0;
743
744         argv = argv_split(GFP_KERNEL, buf, &argc);
745         if (!argv)
746                 return -ENOMEM;
747
748         if (argc)
749                 ret = create_trace_probe(argc, argv);
750
751         argv_free(argv);
752         return ret;
753 }
754
755 #define WRITE_BUFSIZE 128
756
757 static ssize_t probes_write(struct file *file, const char __user *buffer,
758                             size_t count, loff_t *ppos)
759 {
760         char *kbuf, *tmp;
761         int ret;
762         size_t done;
763         size_t size;
764
765         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
766         if (!kbuf)
767                 return -ENOMEM;
768
769         ret = done = 0;
770         while (done < count) {
771                 size = count - done;
772                 if (size >= WRITE_BUFSIZE)
773                         size = WRITE_BUFSIZE - 1;
774                 if (copy_from_user(kbuf, buffer + done, size)) {
775                         ret = -EFAULT;
776                         goto out;
777                 }
778                 kbuf[size] = '\0';
779                 tmp = strchr(kbuf, '\n');
780                 if (tmp) {
781                         *tmp = '\0';
782                         size = tmp - kbuf + 1;
783                 } else if (done + size < count) {
784                         pr_warning("Line length is too long: "
785                                    "Should be less than %d.", WRITE_BUFSIZE);
786                         ret = -EINVAL;
787                         goto out;
788                 }
789                 done += size;
790                 /* Remove comments */
791                 tmp = strchr(kbuf, '#');
792                 if (tmp)
793                         *tmp = '\0';
794
795                 ret = command_trace_probe(kbuf);
796                 if (ret)
797                         goto out;
798         }
799         ret = done;
800 out:
801         kfree(kbuf);
802         return ret;
803 }
804
805 static const struct file_operations kprobe_events_ops = {
806         .owner          = THIS_MODULE,
807         .open           = probes_open,
808         .read           = seq_read,
809         .llseek         = seq_lseek,
810         .release        = seq_release,
811         .write          = probes_write,
812 };
813
814 /* Probes profiling interfaces */
815 static int probes_profile_seq_show(struct seq_file *m, void *v)
816 {
817         struct trace_probe *tp = v;
818
819         seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
820                    tp->rp.kp.nmissed);
821
822         return 0;
823 }
824
825 static const struct seq_operations profile_seq_op = {
826         .start  = probes_seq_start,
827         .next   = probes_seq_next,
828         .stop   = probes_seq_stop,
829         .show   = probes_profile_seq_show
830 };
831
832 static int profile_open(struct inode *inode, struct file *file)
833 {
834         return seq_open(file, &profile_seq_op);
835 }
836
837 static const struct file_operations kprobe_profile_ops = {
838         .owner          = THIS_MODULE,
839         .open           = profile_open,
840         .read           = seq_read,
841         .llseek         = seq_lseek,
842         .release        = seq_release,
843 };
844
845 /* Kprobe handler */
846 static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
847 {
848         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
849         struct kprobe_trace_entry *entry;
850         struct ring_buffer_event *event;
851         struct ring_buffer *buffer;
852         int size, i, pc;
853         unsigned long irq_flags;
854         struct ftrace_event_call *call = &tp->call;
855
856         tp->nhit++;
857
858         local_save_flags(irq_flags);
859         pc = preempt_count();
860
861         size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
862
863         event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
864                                                   irq_flags, pc);
865         if (!event)
866                 return 0;
867
868         entry = ring_buffer_event_data(event);
869         entry->nargs = tp->nr_args;
870         entry->ip = (unsigned long)kp->addr;
871         for (i = 0; i < tp->nr_args; i++)
872                 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
873
874         if (!filter_current_check_discard(buffer, call, entry, event))
875                 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
876         return 0;
877 }
878
879 /* Kretprobe handler */
880 static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
881                                           struct pt_regs *regs)
882 {
883         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
884         struct kretprobe_trace_entry *entry;
885         struct ring_buffer_event *event;
886         struct ring_buffer *buffer;
887         int size, i, pc;
888         unsigned long irq_flags;
889         struct ftrace_event_call *call = &tp->call;
890
891         local_save_flags(irq_flags);
892         pc = preempt_count();
893
894         size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
895
896         event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
897                                                   irq_flags, pc);
898         if (!event)
899                 return 0;
900
901         entry = ring_buffer_event_data(event);
902         entry->nargs = tp->nr_args;
903         entry->func = (unsigned long)tp->rp.kp.addr;
904         entry->ret_ip = (unsigned long)ri->ret_addr;
905         for (i = 0; i < tp->nr_args; i++)
906                 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
907
908         if (!filter_current_check_discard(buffer, call, entry, event))
909                 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
910
911         return 0;
912 }
913
914 /* Event entry printers */
915 enum print_line_t
916 print_kprobe_event(struct trace_iterator *iter, int flags)
917 {
918         struct kprobe_trace_entry *field;
919         struct trace_seq *s = &iter->seq;
920         struct trace_event *event;
921         struct trace_probe *tp;
922         int i;
923
924         field = (struct kprobe_trace_entry *)iter->ent;
925         event = ftrace_find_event(field->ent.type);
926         tp = container_of(event, struct trace_probe, event);
927
928         if (!trace_seq_printf(s, "%s: (", tp->call.name))
929                 goto partial;
930
931         if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
932                 goto partial;
933
934         if (!trace_seq_puts(s, ")"))
935                 goto partial;
936
937         for (i = 0; i < field->nargs; i++)
938                 if (!trace_seq_printf(s, " %s=%lx",
939                                       tp->args[i].name, field->args[i]))
940                         goto partial;
941
942         if (!trace_seq_puts(s, "\n"))
943                 goto partial;
944
945         return TRACE_TYPE_HANDLED;
946 partial:
947         return TRACE_TYPE_PARTIAL_LINE;
948 }
949
950 enum print_line_t
951 print_kretprobe_event(struct trace_iterator *iter, int flags)
952 {
953         struct kretprobe_trace_entry *field;
954         struct trace_seq *s = &iter->seq;
955         struct trace_event *event;
956         struct trace_probe *tp;
957         int i;
958
959         field = (struct kretprobe_trace_entry *)iter->ent;
960         event = ftrace_find_event(field->ent.type);
961         tp = container_of(event, struct trace_probe, event);
962
963         if (!trace_seq_printf(s, "%s: (", tp->call.name))
964                 goto partial;
965
966         if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
967                 goto partial;
968
969         if (!trace_seq_puts(s, " <- "))
970                 goto partial;
971
972         if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
973                 goto partial;
974
975         if (!trace_seq_puts(s, ")"))
976                 goto partial;
977
978         for (i = 0; i < field->nargs; i++)
979                 if (!trace_seq_printf(s, " %s=%lx",
980                                       tp->args[i].name, field->args[i]))
981                         goto partial;
982
983         if (!trace_seq_puts(s, "\n"))
984                 goto partial;
985
986         return TRACE_TYPE_HANDLED;
987 partial:
988         return TRACE_TYPE_PARTIAL_LINE;
989 }
990
991 static int probe_event_enable(struct ftrace_event_call *call)
992 {
993         struct trace_probe *tp = (struct trace_probe *)call->data;
994
995         tp->flags |= TP_FLAG_TRACE;
996         if (probe_is_return(tp))
997                 return enable_kretprobe(&tp->rp);
998         else
999                 return enable_kprobe(&tp->rp.kp);
1000 }
1001
1002 static void probe_event_disable(struct ftrace_event_call *call)
1003 {
1004         struct trace_probe *tp = (struct trace_probe *)call->data;
1005
1006         tp->flags &= ~TP_FLAG_TRACE;
1007         if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1008                 if (probe_is_return(tp))
1009                         disable_kretprobe(&tp->rp);
1010                 else
1011                         disable_kprobe(&tp->rp.kp);
1012         }
1013 }
1014
1015 static int probe_event_raw_init(struct ftrace_event_call *event_call)
1016 {
1017         INIT_LIST_HEAD(&event_call->fields);
1018
1019         return 0;
1020 }
1021
1022 #undef DEFINE_FIELD
1023 #define DEFINE_FIELD(type, item, name, is_signed)                       \
1024         do {                                                            \
1025                 ret = trace_define_field(event_call, #type, name,       \
1026                                          offsetof(typeof(field), item), \
1027                                          sizeof(field.item), is_signed, \
1028                                          FILTER_OTHER);                 \
1029                 if (ret)                                                \
1030                         return ret;                                     \
1031         } while (0)
1032
1033 static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1034 {
1035         int ret, i;
1036         struct kprobe_trace_entry field;
1037         struct trace_probe *tp = (struct trace_probe *)event_call->data;
1038
1039         ret = trace_define_common_fields(event_call);
1040         if (!ret)
1041                 return ret;
1042
1043         DEFINE_FIELD(unsigned long, ip, "ip", 0);
1044         DEFINE_FIELD(int, nargs, "nargs", 1);
1045         /* Set argument names as fields */
1046         for (i = 0; i < tp->nr_args; i++)
1047                 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1048         return 0;
1049 }
1050
1051 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1052 {
1053         int ret, i;
1054         struct kretprobe_trace_entry field;
1055         struct trace_probe *tp = (struct trace_probe *)event_call->data;
1056
1057         ret = trace_define_common_fields(event_call);
1058         if (!ret)
1059                 return ret;
1060
1061         DEFINE_FIELD(unsigned long, func, "func", 0);
1062         DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0);
1063         DEFINE_FIELD(int, nargs, "nargs", 1);
1064         /* Set argument names as fields */
1065         for (i = 0; i < tp->nr_args; i++)
1066                 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1067         return 0;
1068 }
1069
1070 static int __probe_event_show_format(struct trace_seq *s,
1071                                      struct trace_probe *tp, const char *fmt,
1072                                      const char *arg)
1073 {
1074         int i;
1075
1076         /* Show format */
1077         if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1078                 return 0;
1079
1080         for (i = 0; i < tp->nr_args; i++)
1081                 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1082                         return 0;
1083
1084         if (!trace_seq_printf(s, "\", %s", arg))
1085                 return 0;
1086
1087         for (i = 0; i < tp->nr_args; i++)
1088                 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1089                         return 0;
1090
1091         return trace_seq_puts(s, "\n");
1092 }
1093
1094 #undef SHOW_FIELD
1095 #define SHOW_FIELD(type, item, name)                                    \
1096         do {                                                            \
1097                 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"    \
1098                                 "offset:%u;\tsize:%u;\n", name,         \
1099                                 (unsigned int)offsetof(typeof(field), item),\
1100                                 (unsigned int)sizeof(type));            \
1101                 if (!ret)                                               \
1102                         return 0;                                       \
1103         } while (0)
1104
1105 static int kprobe_event_show_format(struct ftrace_event_call *call,
1106                                     struct trace_seq *s)
1107 {
1108         struct kprobe_trace_entry field __attribute__((unused));
1109         int ret, i;
1110         struct trace_probe *tp = (struct trace_probe *)call->data;
1111
1112         SHOW_FIELD(unsigned long, ip, "ip");
1113         SHOW_FIELD(int, nargs, "nargs");
1114
1115         /* Show fields */
1116         for (i = 0; i < tp->nr_args; i++)
1117                 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1118         trace_seq_puts(s, "\n");
1119
1120         return __probe_event_show_format(s, tp, "(%lx)", "REC->ip");
1121 }
1122
1123 static int kretprobe_event_show_format(struct ftrace_event_call *call,
1124                                        struct trace_seq *s)
1125 {
1126         struct kretprobe_trace_entry field __attribute__((unused));
1127         int ret, i;
1128         struct trace_probe *tp = (struct trace_probe *)call->data;
1129
1130         SHOW_FIELD(unsigned long, func, "func");
1131         SHOW_FIELD(unsigned long, ret_ip, "ret_ip");
1132         SHOW_FIELD(int, nargs, "nargs");
1133
1134         /* Show fields */
1135         for (i = 0; i < tp->nr_args; i++)
1136                 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1137         trace_seq_puts(s, "\n");
1138
1139         return __probe_event_show_format(s, tp, "(%lx <- %lx)",
1140                                           "REC->func, REC->ret_ip");
1141 }
1142
1143 #ifdef CONFIG_EVENT_PROFILE
1144
1145 /* Kprobe profile handler */
1146 static __kprobes int kprobe_profile_func(struct kprobe *kp,
1147                                          struct pt_regs *regs)
1148 {
1149         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1150         struct ftrace_event_call *call = &tp->call;
1151         struct kprobe_trace_entry *entry;
1152         int size, __size, i, pc;
1153         unsigned long irq_flags;
1154
1155         local_save_flags(irq_flags);
1156         pc = preempt_count();
1157
1158         __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1159         size = ALIGN(__size + sizeof(u32), sizeof(u64));
1160         size -= sizeof(u32);
1161
1162         do {
1163                 char raw_data[size];
1164                 struct trace_entry *ent;
1165                 /*
1166                  * Zero dead bytes from alignment to avoid stack leak
1167                  * to userspace
1168                  */
1169                 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1170                 entry = (struct kprobe_trace_entry *)raw_data;
1171                 ent = &entry->ent;
1172
1173                 tracing_generic_entry_update(ent, irq_flags, pc);
1174                 ent->type = call->id;
1175                 entry->nargs = tp->nr_args;
1176                 entry->ip = (unsigned long)kp->addr;
1177                 for (i = 0; i < tp->nr_args; i++)
1178                         entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1179                 perf_tp_event(call->id, entry->ip, 1, entry, size);
1180         } while (0);
1181         return 0;
1182 }
1183
1184 /* Kretprobe profile handler */
1185 static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1186                                             struct pt_regs *regs)
1187 {
1188         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1189         struct ftrace_event_call *call = &tp->call;
1190         struct kretprobe_trace_entry *entry;
1191         int size, __size, i, pc;
1192         unsigned long irq_flags;
1193
1194         local_save_flags(irq_flags);
1195         pc = preempt_count();
1196
1197         __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1198         size = ALIGN(__size + sizeof(u32), sizeof(u64));
1199         size -= sizeof(u32);
1200
1201         do {
1202                 char raw_data[size];
1203                 struct trace_entry *ent;
1204
1205                 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1206                 entry = (struct kretprobe_trace_entry *)raw_data;
1207                 ent = &entry->ent;
1208
1209                 tracing_generic_entry_update(ent, irq_flags, pc);
1210                 ent->type = call->id;
1211                 entry->nargs = tp->nr_args;
1212                 entry->func = (unsigned long)tp->rp.kp.addr;
1213                 entry->ret_ip = (unsigned long)ri->ret_addr;
1214                 for (i = 0; i < tp->nr_args; i++)
1215                         entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1216                 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1217         } while (0);
1218         return 0;
1219 }
1220
1221 static int probe_profile_enable(struct ftrace_event_call *call)
1222 {
1223         struct trace_probe *tp = (struct trace_probe *)call->data;
1224
1225         tp->flags |= TP_FLAG_PROFILE;
1226
1227         if (probe_is_return(tp))
1228                 return enable_kretprobe(&tp->rp);
1229         else
1230                 return enable_kprobe(&tp->rp.kp);
1231 }
1232
1233 static void probe_profile_disable(struct ftrace_event_call *call)
1234 {
1235         struct trace_probe *tp = (struct trace_probe *)call->data;
1236
1237         tp->flags &= ~TP_FLAG_PROFILE;
1238
1239         if (!(tp->flags & TP_FLAG_TRACE)) {
1240                 if (probe_is_return(tp))
1241                         disable_kretprobe(&tp->rp);
1242                 else
1243                         disable_kprobe(&tp->rp.kp);
1244         }
1245 }
1246 #endif  /* CONFIG_EVENT_PROFILE */
1247
1248
1249 static __kprobes
1250 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1251 {
1252         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1253
1254         if (tp->flags & TP_FLAG_TRACE)
1255                 kprobe_trace_func(kp, regs);
1256 #ifdef CONFIG_EVENT_PROFILE
1257         if (tp->flags & TP_FLAG_PROFILE)
1258                 kprobe_profile_func(kp, regs);
1259 #endif  /* CONFIG_EVENT_PROFILE */
1260         return 0;       /* We don't tweek kernel, so just return 0 */
1261 }
1262
1263 static __kprobes
1264 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1265 {
1266         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1267
1268         if (tp->flags & TP_FLAG_TRACE)
1269                 kretprobe_trace_func(ri, regs);
1270 #ifdef CONFIG_EVENT_PROFILE
1271         if (tp->flags & TP_FLAG_PROFILE)
1272                 kretprobe_profile_func(ri, regs);
1273 #endif  /* CONFIG_EVENT_PROFILE */
1274         return 0;       /* We don't tweek kernel, so just return 0 */
1275 }
1276
1277 static int register_probe_event(struct trace_probe *tp)
1278 {
1279         struct ftrace_event_call *call = &tp->call;
1280         int ret;
1281
1282         /* Initialize ftrace_event_call */
1283         if (probe_is_return(tp)) {
1284                 tp->event.trace = print_kretprobe_event;
1285                 call->raw_init = probe_event_raw_init;
1286                 call->show_format = kretprobe_event_show_format;
1287                 call->define_fields = kretprobe_event_define_fields;
1288         } else {
1289                 tp->event.trace = print_kprobe_event;
1290                 call->raw_init = probe_event_raw_init;
1291                 call->show_format = kprobe_event_show_format;
1292                 call->define_fields = kprobe_event_define_fields;
1293         }
1294         call->event = &tp->event;
1295         call->id = register_ftrace_event(&tp->event);
1296         if (!call->id)
1297                 return -ENODEV;
1298         call->enabled = 0;
1299         call->regfunc = probe_event_enable;
1300         call->unregfunc = probe_event_disable;
1301
1302 #ifdef CONFIG_EVENT_PROFILE
1303         atomic_set(&call->profile_count, -1);
1304         call->profile_enable = probe_profile_enable;
1305         call->profile_disable = probe_profile_disable;
1306 #endif
1307         call->data = tp;
1308         ret = trace_add_event_call(call);
1309         if (ret) {
1310                 pr_info("Failed to register kprobe event: %s\n", call->name);
1311                 unregister_ftrace_event(&tp->event);
1312         }
1313         return ret;
1314 }
1315
1316 static void unregister_probe_event(struct trace_probe *tp)
1317 {
1318         /* tp->event is unregistered in trace_remove_event_call() */
1319         trace_remove_event_call(&tp->call);
1320 }
1321
1322 /* Make a debugfs interface for controling probe points */
1323 static __init int init_kprobe_trace(void)
1324 {
1325         struct dentry *d_tracer;
1326         struct dentry *entry;
1327
1328         d_tracer = tracing_init_dentry();
1329         if (!d_tracer)
1330                 return 0;
1331
1332         entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1333                                     NULL, &kprobe_events_ops);
1334
1335         /* Event list interface */
1336         if (!entry)
1337                 pr_warning("Could not create debugfs "
1338                            "'kprobe_events' entry\n");
1339
1340         /* Profile interface */
1341         entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1342                                     NULL, &kprobe_profile_ops);
1343
1344         if (!entry)
1345                 pr_warning("Could not create debugfs "
1346                            "'kprobe_profile' entry\n");
1347         return 0;
1348 }
1349 fs_initcall(init_kprobe_trace);
1350
1351
1352 #ifdef CONFIG_FTRACE_STARTUP_TEST
1353
1354 static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1355                                         int a4, int a5, int a6)
1356 {
1357         return a1 + a2 + a3 + a4 + a5 + a6;
1358 }
1359
1360 static __init int kprobe_trace_self_tests_init(void)
1361 {
1362         int ret;
1363         int (*target)(int, int, int, int, int, int);
1364
1365         target = kprobe_trace_selftest_target;
1366
1367         pr_info("Testing kprobe tracing: ");
1368
1369         ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1370                                   "a1 a2 a3 a4 a5 a6");
1371         if (WARN_ON_ONCE(ret))
1372                 pr_warning("error enabling function entry\n");
1373
1374         ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1375                                   "ra rv");
1376         if (WARN_ON_ONCE(ret))
1377                 pr_warning("error enabling function return\n");
1378
1379         ret = target(1, 2, 3, 4, 5, 6);
1380
1381         cleanup_all_probes();
1382
1383         pr_cont("OK\n");
1384         return 0;
1385 }
1386
1387 late_initcall(kprobe_trace_self_tests_init);
1388
1389 #endif