44dad1aa95d3c472f554b8bb787a639ee5c54cc6
[safe/jmp/linux-2.6] / kernel / trace / trace_kprobe.c
1 /*
2  * kprobe based kernel tracer
3  *
4  * Created by Masami Hiramatsu <mhiramat@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19
20 #include <linux/module.h>
21 #include <linux/uaccess.h>
22 #include <linux/kprobes.h>
23 #include <linux/seq_file.h>
24 #include <linux/slab.h>
25 #include <linux/smp.h>
26 #include <linux/debugfs.h>
27 #include <linux/types.h>
28 #include <linux/string.h>
29 #include <linux/ctype.h>
30 #include <linux/ptrace.h>
31 #include <linux/perf_counter.h>
32
33 #include "trace.h"
34 #include "trace_output.h"
35
36 #define MAX_TRACE_ARGS 128
37 #define MAX_ARGSTR_LEN 63
38 #define MAX_EVENT_NAME_LEN 64
39
40 /* currently, trace_kprobe only supports X86. */
41
42 struct fetch_func {
43         unsigned long (*func)(struct pt_regs *, void *);
44         void *data;
45 };
46
47 static __kprobes unsigned long call_fetch(struct fetch_func *f,
48                                           struct pt_regs *regs)
49 {
50         return f->func(regs, f->data);
51 }
52
53 /* fetch handlers */
54 static __kprobes unsigned long fetch_register(struct pt_regs *regs,
55                                               void *offset)
56 {
57         return regs_get_register(regs, (unsigned int)((unsigned long)offset));
58 }
59
60 static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
61                                            void *num)
62 {
63         return regs_get_kernel_stack_nth(regs,
64                                          (unsigned int)((unsigned long)num));
65 }
66
67 static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
68 {
69         unsigned long retval;
70
71         if (probe_kernel_address(addr, retval))
72                 return 0;
73         return retval;
74 }
75
76 static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
77 {
78         return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
79 }
80
81 static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
82                                               void *dummy)
83 {
84         return regs_return_value(regs);
85 }
86
87 static __kprobes unsigned long fetch_ip(struct pt_regs *regs, void *dummy)
88 {
89         return instruction_pointer(regs);
90 }
91
92 static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
93                                                    void *dummy)
94 {
95         return kernel_stack_pointer(regs);
96 }
97
98 /* Memory fetching by symbol */
99 struct symbol_cache {
100         char *symbol;
101         long offset;
102         unsigned long addr;
103 };
104
105 static unsigned long update_symbol_cache(struct symbol_cache *sc)
106 {
107         sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
108         if (sc->addr)
109                 sc->addr += sc->offset;
110         return sc->addr;
111 }
112
113 static void free_symbol_cache(struct symbol_cache *sc)
114 {
115         kfree(sc->symbol);
116         kfree(sc);
117 }
118
119 static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
120 {
121         struct symbol_cache *sc;
122
123         if (!sym || strlen(sym) == 0)
124                 return NULL;
125         sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
126         if (!sc)
127                 return NULL;
128
129         sc->symbol = kstrdup(sym, GFP_KERNEL);
130         if (!sc->symbol) {
131                 kfree(sc);
132                 return NULL;
133         }
134         sc->offset = offset;
135
136         update_symbol_cache(sc);
137         return sc;
138 }
139
140 static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
141 {
142         struct symbol_cache *sc = data;
143
144         if (sc->addr)
145                 return fetch_memory(regs, (void *)sc->addr);
146         else
147                 return 0;
148 }
149
150 /* Special indirect memory access interface */
151 struct indirect_fetch_data {
152         struct fetch_func orig;
153         long offset;
154 };
155
156 static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
157 {
158         struct indirect_fetch_data *ind = data;
159         unsigned long addr;
160
161         addr = call_fetch(&ind->orig, regs);
162         if (addr) {
163                 addr += ind->offset;
164                 return fetch_memory(regs, (void *)addr);
165         } else
166                 return 0;
167 }
168
169 static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
170 {
171         if (data->orig.func == fetch_indirect)
172                 free_indirect_fetch_data(data->orig.data);
173         else if (data->orig.func == fetch_symbol)
174                 free_symbol_cache(data->orig.data);
175         kfree(data);
176 }
177
178 /**
179  * Kprobe tracer core functions
180  */
181
182 struct probe_arg {
183         struct fetch_func       fetch;
184         const char              *name;
185 };
186
187 struct trace_probe {
188         struct list_head        list;
189         struct kretprobe        rp;     /* Use rp.kp for kprobe use */
190         unsigned long           nhit;
191         const char              *symbol;        /* symbol name */
192         struct ftrace_event_call        call;
193         struct trace_event              event;
194         unsigned int            nr_args;
195         struct probe_arg        args[];
196 };
197
198 #define SIZEOF_TRACE_PROBE(n)                   \
199         (offsetof(struct trace_probe, args) +   \
200         (sizeof(struct probe_arg) * (n)))
201
202 static int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs);
203 static int kretprobe_trace_func(struct kretprobe_instance *ri,
204                                 struct pt_regs *regs);
205
206 static __kprobes int probe_is_return(struct trace_probe *tp)
207 {
208         return tp->rp.handler != NULL;
209 }
210
211 static __kprobes const char *probe_symbol(struct trace_probe *tp)
212 {
213         return tp->symbol ? tp->symbol : "unknown";
214 }
215
216 static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
217 {
218         int ret = -EINVAL;
219
220         if (ff->func == fetch_argument)
221                 ret = snprintf(buf, n, "a%lu", (unsigned long)ff->data);
222         else if (ff->func == fetch_register) {
223                 const char *name;
224                 name = regs_query_register_name((unsigned int)((long)ff->data));
225                 ret = snprintf(buf, n, "%%%s", name);
226         } else if (ff->func == fetch_stack)
227                 ret = snprintf(buf, n, "s%lu", (unsigned long)ff->data);
228         else if (ff->func == fetch_memory)
229                 ret = snprintf(buf, n, "@0x%p", ff->data);
230         else if (ff->func == fetch_symbol) {
231                 struct symbol_cache *sc = ff->data;
232                 ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
233         } else if (ff->func == fetch_retvalue)
234                 ret = snprintf(buf, n, "rv");
235         else if (ff->func == fetch_ip)
236                 ret = snprintf(buf, n, "ra");
237         else if (ff->func == fetch_stack_address)
238                 ret = snprintf(buf, n, "sa");
239         else if (ff->func == fetch_indirect) {
240                 struct indirect_fetch_data *id = ff->data;
241                 size_t l = 0;
242                 ret = snprintf(buf, n, "%+ld(", id->offset);
243                 if (ret >= n)
244                         goto end;
245                 l += ret;
246                 ret = probe_arg_string(buf + l, n - l, &id->orig);
247                 if (ret < 0)
248                         goto end;
249                 l += ret;
250                 ret = snprintf(buf + l, n - l, ")");
251                 ret += l;
252         }
253 end:
254         if (ret >= n)
255                 return -ENOSPC;
256         return ret;
257 }
258
259 static int register_probe_event(struct trace_probe *tp);
260 static void unregister_probe_event(struct trace_probe *tp);
261
262 static DEFINE_MUTEX(probe_lock);
263 static LIST_HEAD(probe_list);
264
265 /*
266  * Allocate new trace_probe and initialize it (including kprobes).
267  */
268 static struct trace_probe *alloc_trace_probe(const char *event,
269                                              void *addr,
270                                              const char *symbol,
271                                              unsigned long offs,
272                                              int nargs, int is_return)
273 {
274         struct trace_probe *tp;
275
276         tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
277         if (!tp)
278                 return ERR_PTR(-ENOMEM);
279
280         if (symbol) {
281                 tp->symbol = kstrdup(symbol, GFP_KERNEL);
282                 if (!tp->symbol)
283                         goto error;
284                 tp->rp.kp.symbol_name = tp->symbol;
285                 tp->rp.kp.offset = offs;
286         } else
287                 tp->rp.kp.addr = addr;
288
289         /* Set handler here for checking whether this probe is return or not. */
290         if (is_return)
291                 tp->rp.handler = kretprobe_trace_func;
292         else
293                 tp->rp.kp.pre_handler = kprobe_trace_func;
294
295         if (!event)
296                 goto error;
297         tp->call.name = kstrdup(event, GFP_KERNEL);
298         if (!tp->call.name)
299                 goto error;
300
301         INIT_LIST_HEAD(&tp->list);
302         return tp;
303 error:
304         kfree(tp->symbol);
305         kfree(tp);
306         return ERR_PTR(-ENOMEM);
307 }
308
309 static void free_probe_arg(struct probe_arg *arg)
310 {
311         if (arg->fetch.func == fetch_symbol)
312                 free_symbol_cache(arg->fetch.data);
313         else if (arg->fetch.func == fetch_indirect)
314                 free_indirect_fetch_data(arg->fetch.data);
315         kfree(arg->name);
316 }
317
318 static void free_trace_probe(struct trace_probe *tp)
319 {
320         int i;
321
322         for (i = 0; i < tp->nr_args; i++)
323                 free_probe_arg(&tp->args[i]);
324
325         kfree(tp->call.name);
326         kfree(tp->symbol);
327         kfree(tp);
328 }
329
330 static struct trace_probe *find_probe_event(const char *event)
331 {
332         struct trace_probe *tp;
333
334         list_for_each_entry(tp, &probe_list, list)
335                 if (!strcmp(tp->call.name, event))
336                         return tp;
337         return NULL;
338 }
339
340 static void __unregister_trace_probe(struct trace_probe *tp)
341 {
342         if (probe_is_return(tp))
343                 unregister_kretprobe(&tp->rp);
344         else
345                 unregister_kprobe(&tp->rp.kp);
346 }
347
348 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
349 static void unregister_trace_probe(struct trace_probe *tp)
350 {
351         unregister_probe_event(tp);
352         __unregister_trace_probe(tp);
353         list_del(&tp->list);
354 }
355
356 /* Register a trace_probe and probe_event */
357 static int register_trace_probe(struct trace_probe *tp)
358 {
359         struct trace_probe *old_tp;
360         int ret;
361
362         mutex_lock(&probe_lock);
363
364         if (probe_is_return(tp))
365                 ret = register_kretprobe(&tp->rp);
366         else
367                 ret = register_kprobe(&tp->rp.kp);
368
369         if (ret) {
370                 pr_warning("Could not insert probe(%d)\n", ret);
371                 if (ret == -EILSEQ) {
372                         pr_warning("Probing address(0x%p) is not an "
373                                    "instruction boundary.\n",
374                                    tp->rp.kp.addr);
375                         ret = -EINVAL;
376                 }
377                 goto end;
378         }
379         /* register as an event */
380         old_tp = find_probe_event(tp->call.name);
381         if (old_tp) {
382                 /* delete old event */
383                 unregister_trace_probe(old_tp);
384                 free_trace_probe(old_tp);
385         }
386         ret = register_probe_event(tp);
387         if (ret) {
388                 pr_warning("Faild to register probe event(%d)\n", ret);
389                 __unregister_trace_probe(tp);
390         }
391         list_add_tail(&tp->list, &probe_list);
392 end:
393         mutex_unlock(&probe_lock);
394         return ret;
395 }
396
397 /* Split symbol and offset. */
398 static int split_symbol_offset(char *symbol, unsigned long *offset)
399 {
400         char *tmp;
401         int ret;
402
403         if (!offset)
404                 return -EINVAL;
405
406         tmp = strchr(symbol, '+');
407         if (tmp) {
408                 /* skip sign because strict_strtol doesn't accept '+' */
409                 ret = strict_strtoul(tmp + 1, 0, offset);
410                 if (ret)
411                         return ret;
412                 *tmp = '\0';
413         } else
414                 *offset = 0;
415         return 0;
416 }
417
418 #define PARAM_MAX_ARGS 16
419 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
420
421 static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
422 {
423         int ret = 0;
424         unsigned long param;
425         long offset;
426         char *tmp;
427
428         switch (arg[0]) {
429         case 'a':       /* argument */
430                 ret = strict_strtoul(arg + 1, 10, &param);
431                 if (ret || param > PARAM_MAX_ARGS)
432                         ret = -EINVAL;
433                 else {
434                         ff->func = fetch_argument;
435                         ff->data = (void *)param;
436                 }
437                 break;
438         case 'r':       /* retval or retaddr */
439                 if (is_return && arg[1] == 'v') {
440                         ff->func = fetch_retvalue;
441                         ff->data = NULL;
442                 } else if (is_return && arg[1] == 'a') {
443                         ff->func = fetch_ip;
444                         ff->data = NULL;
445                 } else
446                         ret = -EINVAL;
447                 break;
448         case '%':       /* named register */
449                 ret = regs_query_register_offset(arg + 1);
450                 if (ret >= 0) {
451                         ff->func = fetch_register;
452                         ff->data = (void *)(unsigned long)ret;
453                         ret = 0;
454                 }
455                 break;
456         case 's':       /* stack */
457                 if (arg[1] == 'a') {
458                         ff->func = fetch_stack_address;
459                         ff->data = NULL;
460                 } else {
461                         ret = strict_strtoul(arg + 1, 10, &param);
462                         if (ret || param > PARAM_MAX_STACK)
463                                 ret = -EINVAL;
464                         else {
465                                 ff->func = fetch_stack;
466                                 ff->data = (void *)param;
467                         }
468                 }
469                 break;
470         case '@':       /* memory or symbol */
471                 if (isdigit(arg[1])) {
472                         ret = strict_strtoul(arg + 1, 0, &param);
473                         if (ret)
474                                 break;
475                         ff->func = fetch_memory;
476                         ff->data = (void *)param;
477                 } else {
478                         ret = split_symbol_offset(arg + 1, &offset);
479                         if (ret)
480                                 break;
481                         ff->data = alloc_symbol_cache(arg + 1,
482                                                               offset);
483                         if (ff->data)
484                                 ff->func = fetch_symbol;
485                         else
486                                 ret = -EINVAL;
487                 }
488                 break;
489         case '+':       /* indirect memory */
490         case '-':
491                 tmp = strchr(arg, '(');
492                 if (!tmp) {
493                         ret = -EINVAL;
494                         break;
495                 }
496                 *tmp = '\0';
497                 ret = strict_strtol(arg + 1, 0, &offset);
498                 if (ret)
499                         break;
500                 if (arg[0] == '-')
501                         offset = -offset;
502                 arg = tmp + 1;
503                 tmp = strrchr(arg, ')');
504                 if (tmp) {
505                         struct indirect_fetch_data *id;
506                         *tmp = '\0';
507                         id = kzalloc(sizeof(struct indirect_fetch_data),
508                                      GFP_KERNEL);
509                         if (!id)
510                                 return -ENOMEM;
511                         id->offset = offset;
512                         ret = parse_probe_arg(arg, &id->orig, is_return);
513                         if (ret)
514                                 kfree(id);
515                         else {
516                                 ff->func = fetch_indirect;
517                                 ff->data = (void *)id;
518                         }
519                 } else
520                         ret = -EINVAL;
521                 break;
522         default:
523                 /* TODO: support custom handler */
524                 ret = -EINVAL;
525         }
526         return ret;
527 }
528
529 static int create_trace_probe(int argc, char **argv)
530 {
531         /*
532          * Argument syntax:
533          *  - Add kprobe: p[:EVENT] SYMBOL[+OFFS]|ADDRESS [FETCHARGS]
534          *  - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS]
535          * Fetch args:
536          *  aN  : fetch Nth of function argument. (N:0-)
537          *  rv  : fetch return value
538          *  ra  : fetch return address
539          *  sa  : fetch stack address
540          *  sN  : fetch Nth of stack (N:0-)
541          *  @ADDR       : fetch memory at ADDR (ADDR should be in kernel)
542          *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
543          *  %REG        : fetch register REG
544          * Indirect memory fetch:
545          *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
546          * Alias name of args:
547          *  NAME=FETCHARG : set NAME as alias of FETCHARG.
548          */
549         struct trace_probe *tp;
550         int i, ret = 0;
551         int is_return = 0;
552         char *symbol = NULL, *event = NULL, *arg = NULL;
553         unsigned long offset = 0;
554         void *addr = NULL;
555         char buf[MAX_EVENT_NAME_LEN];
556
557         if (argc < 2)
558                 return -EINVAL;
559
560         if (argv[0][0] == 'p')
561                 is_return = 0;
562         else if (argv[0][0] == 'r')
563                 is_return = 1;
564         else
565                 return -EINVAL;
566
567         if (argv[0][1] == ':') {
568                 event = &argv[0][2];
569                 if (strlen(event) == 0) {
570                         pr_info("Event name is not specifiled\n");
571                         return -EINVAL;
572                 }
573         }
574
575         if (isdigit(argv[1][0])) {
576                 if (is_return)
577                         return -EINVAL;
578                 /* an address specified */
579                 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
580                 if (ret)
581                         return ret;
582         } else {
583                 /* a symbol specified */
584                 symbol = argv[1];
585                 /* TODO: support .init module functions */
586                 ret = split_symbol_offset(symbol, &offset);
587                 if (ret)
588                         return ret;
589                 if (offset && is_return)
590                         return -EINVAL;
591         }
592         argc -= 2; argv += 2;
593
594         /* setup a probe */
595         if (!event) {
596                 /* Make a new event name */
597                 if (symbol)
598                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
599                                  is_return ? 'r' : 'p', symbol, offset);
600                 else
601                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
602                                  is_return ? 'r' : 'p', addr);
603                 event = buf;
604         }
605         tp = alloc_trace_probe(event, addr, symbol, offset, argc, is_return);
606         if (IS_ERR(tp))
607                 return PTR_ERR(tp);
608
609         /* parse arguments */
610         ret = 0;
611         for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
612                 /* Parse argument name */
613                 arg = strchr(argv[i], '=');
614                 if (arg)
615                         *arg++ = '\0';
616                 else
617                         arg = argv[i];
618                 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
619
620                 /* Parse fetch argument */
621                 if (strlen(arg) > MAX_ARGSTR_LEN) {
622                         pr_info("Argument%d(%s) is too long.\n", i, arg);
623                         ret = -ENOSPC;
624                         goto error;
625                 }
626                 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
627                 if (ret)
628                         goto error;
629         }
630         tp->nr_args = i;
631
632         ret = register_trace_probe(tp);
633         if (ret)
634                 goto error;
635         return 0;
636
637 error:
638         free_trace_probe(tp);
639         return ret;
640 }
641
642 static void cleanup_all_probes(void)
643 {
644         struct trace_probe *tp;
645
646         mutex_lock(&probe_lock);
647         /* TODO: Use batch unregistration */
648         while (!list_empty(&probe_list)) {
649                 tp = list_entry(probe_list.next, struct trace_probe, list);
650                 unregister_trace_probe(tp);
651                 free_trace_probe(tp);
652         }
653         mutex_unlock(&probe_lock);
654 }
655
656
657 /* Probes listing interfaces */
658 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
659 {
660         mutex_lock(&probe_lock);
661         return seq_list_start(&probe_list, *pos);
662 }
663
664 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
665 {
666         return seq_list_next(v, &probe_list, pos);
667 }
668
669 static void probes_seq_stop(struct seq_file *m, void *v)
670 {
671         mutex_unlock(&probe_lock);
672 }
673
674 static int probes_seq_show(struct seq_file *m, void *v)
675 {
676         struct trace_probe *tp = v;
677         int i, ret;
678         char buf[MAX_ARGSTR_LEN + 1];
679
680         seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
681         seq_printf(m, ":%s", tp->call.name);
682
683         if (tp->symbol)
684                 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
685         else
686                 seq_printf(m, " 0x%p", tp->rp.kp.addr);
687
688         for (i = 0; i < tp->nr_args; i++) {
689                 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
690                 if (ret < 0) {
691                         pr_warning("Argument%d decoding error(%d).\n", i, ret);
692                         return ret;
693                 }
694                 seq_printf(m, " %s=%s", tp->args[i].name, buf);
695         }
696         seq_printf(m, "\n");
697         return 0;
698 }
699
700 static const struct seq_operations probes_seq_op = {
701         .start  = probes_seq_start,
702         .next   = probes_seq_next,
703         .stop   = probes_seq_stop,
704         .show   = probes_seq_show
705 };
706
707 static int probes_open(struct inode *inode, struct file *file)
708 {
709         if ((file->f_mode & FMODE_WRITE) &&
710             (file->f_flags & O_TRUNC))
711                 cleanup_all_probes();
712
713         return seq_open(file, &probes_seq_op);
714 }
715
716 static int command_trace_probe(const char *buf)
717 {
718         char **argv;
719         int argc = 0, ret = 0;
720
721         argv = argv_split(GFP_KERNEL, buf, &argc);
722         if (!argv)
723                 return -ENOMEM;
724
725         if (argc)
726                 ret = create_trace_probe(argc, argv);
727
728         argv_free(argv);
729         return ret;
730 }
731
732 #define WRITE_BUFSIZE 128
733
734 static ssize_t probes_write(struct file *file, const char __user *buffer,
735                             size_t count, loff_t *ppos)
736 {
737         char *kbuf, *tmp;
738         int ret;
739         size_t done;
740         size_t size;
741
742         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
743         if (!kbuf)
744                 return -ENOMEM;
745
746         ret = done = 0;
747         while (done < count) {
748                 size = count - done;
749                 if (size >= WRITE_BUFSIZE)
750                         size = WRITE_BUFSIZE - 1;
751                 if (copy_from_user(kbuf, buffer + done, size)) {
752                         ret = -EFAULT;
753                         goto out;
754                 }
755                 kbuf[size] = '\0';
756                 tmp = strchr(kbuf, '\n');
757                 if (tmp) {
758                         *tmp = '\0';
759                         size = tmp - kbuf + 1;
760                 } else if (done + size < count) {
761                         pr_warning("Line length is too long: "
762                                    "Should be less than %d.", WRITE_BUFSIZE);
763                         ret = -EINVAL;
764                         goto out;
765                 }
766                 done += size;
767                 /* Remove comments */
768                 tmp = strchr(kbuf, '#');
769                 if (tmp)
770                         *tmp = '\0';
771
772                 ret = command_trace_probe(kbuf);
773                 if (ret)
774                         goto out;
775         }
776         ret = done;
777 out:
778         kfree(kbuf);
779         return ret;
780 }
781
782 static const struct file_operations kprobe_events_ops = {
783         .owner          = THIS_MODULE,
784         .open           = probes_open,
785         .read           = seq_read,
786         .llseek         = seq_lseek,
787         .release        = seq_release,
788         .write          = probes_write,
789 };
790
791 /* Probes profiling interfaces */
792 static int probes_profile_seq_show(struct seq_file *m, void *v)
793 {
794         struct trace_probe *tp = v;
795
796         seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
797                    tp->rp.kp.nmissed);
798
799         return 0;
800 }
801
802 static const struct seq_operations profile_seq_op = {
803         .start  = probes_seq_start,
804         .next   = probes_seq_next,
805         .stop   = probes_seq_stop,
806         .show   = probes_profile_seq_show
807 };
808
809 static int profile_open(struct inode *inode, struct file *file)
810 {
811         return seq_open(file, &profile_seq_op);
812 }
813
814 static const struct file_operations kprobe_profile_ops = {
815         .owner          = THIS_MODULE,
816         .open           = profile_open,
817         .read           = seq_read,
818         .llseek         = seq_lseek,
819         .release        = seq_release,
820 };
821
822 /* Kprobe handler */
823 static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
824 {
825         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
826         struct kprobe_trace_entry *entry;
827         struct ring_buffer_event *event;
828         struct ring_buffer *buffer;
829         int size, i, pc;
830         unsigned long irq_flags;
831         struct ftrace_event_call *call = &tp->call;
832
833         tp->nhit++;
834
835         local_save_flags(irq_flags);
836         pc = preempt_count();
837
838         size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
839
840         event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
841                                                   irq_flags, pc);
842         if (!event)
843                 return 0;
844
845         entry = ring_buffer_event_data(event);
846         entry->nargs = tp->nr_args;
847         entry->ip = (unsigned long)kp->addr;
848         for (i = 0; i < tp->nr_args; i++)
849                 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
850
851         if (!filter_current_check_discard(buffer, call, entry, event))
852                 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
853         return 0;
854 }
855
856 /* Kretprobe handler */
857 static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
858                                           struct pt_regs *regs)
859 {
860         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
861         struct kretprobe_trace_entry *entry;
862         struct ring_buffer_event *event;
863         struct ring_buffer *buffer;
864         int size, i, pc;
865         unsigned long irq_flags;
866         struct ftrace_event_call *call = &tp->call;
867
868         local_save_flags(irq_flags);
869         pc = preempt_count();
870
871         size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
872
873         event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
874                                                   irq_flags, pc);
875         if (!event)
876                 return 0;
877
878         entry = ring_buffer_event_data(event);
879         entry->nargs = tp->nr_args;
880         entry->func = (unsigned long)tp->rp.kp.addr;
881         entry->ret_ip = (unsigned long)ri->ret_addr;
882         for (i = 0; i < tp->nr_args; i++)
883                 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
884
885         if (!filter_current_check_discard(buffer, call, entry, event))
886                 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
887
888         return 0;
889 }
890
891 /* Event entry printers */
892 enum print_line_t
893 print_kprobe_event(struct trace_iterator *iter, int flags)
894 {
895         struct kprobe_trace_entry *field;
896         struct trace_seq *s = &iter->seq;
897         struct trace_event *event;
898         struct trace_probe *tp;
899         int i;
900
901         field = (struct kprobe_trace_entry *)iter->ent;
902         event = ftrace_find_event(field->ent.type);
903         tp = container_of(event, struct trace_probe, event);
904
905         if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
906                 goto partial;
907
908         if (!trace_seq_puts(s, ":"))
909                 goto partial;
910
911         for (i = 0; i < field->nargs; i++)
912                 if (!trace_seq_printf(s, " %s=%lx",
913                                       tp->args[i].name, field->args[i]))
914                         goto partial;
915
916         if (!trace_seq_puts(s, "\n"))
917                 goto partial;
918
919         return TRACE_TYPE_HANDLED;
920 partial:
921         return TRACE_TYPE_PARTIAL_LINE;
922 }
923
924 enum print_line_t
925 print_kretprobe_event(struct trace_iterator *iter, int flags)
926 {
927         struct kretprobe_trace_entry *field;
928         struct trace_seq *s = &iter->seq;
929         struct trace_event *event;
930         struct trace_probe *tp;
931         int i;
932
933         field = (struct kretprobe_trace_entry *)iter->ent;
934         event = ftrace_find_event(field->ent.type);
935         tp = container_of(event, struct trace_probe, event);
936
937         if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
938                 goto partial;
939
940         if (!trace_seq_puts(s, " <- "))
941                 goto partial;
942
943         if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
944                 goto partial;
945
946         if (!trace_seq_puts(s, ":"))
947                 goto partial;
948
949         for (i = 0; i < field->nargs; i++)
950                 if (!trace_seq_printf(s, " %s=%lx",
951                                       tp->args[i].name, field->args[i]))
952                         goto partial;
953
954         if (!trace_seq_puts(s, "\n"))
955                 goto partial;
956
957         return TRACE_TYPE_HANDLED;
958 partial:
959         return TRACE_TYPE_PARTIAL_LINE;
960 }
961
962 static int probe_event_enable(struct ftrace_event_call *call)
963 {
964         struct trace_probe *tp = (struct trace_probe *)call->data;
965
966         if (probe_is_return(tp)) {
967                 tp->rp.handler = kretprobe_trace_func;
968                 return enable_kretprobe(&tp->rp);
969         } else {
970                 tp->rp.kp.pre_handler = kprobe_trace_func;
971                 return enable_kprobe(&tp->rp.kp);
972         }
973 }
974
975 static void probe_event_disable(struct ftrace_event_call *call)
976 {
977         struct trace_probe *tp = (struct trace_probe *)call->data;
978
979         if (probe_is_return(tp))
980                 disable_kretprobe(&tp->rp);
981         else
982                 disable_kprobe(&tp->rp.kp);
983 }
984
985 static int probe_event_raw_init(struct ftrace_event_call *event_call)
986 {
987         INIT_LIST_HEAD(&event_call->fields);
988
989         return 0;
990 }
991
992 #undef DEFINE_FIELD
993 #define DEFINE_FIELD(type, item, name, is_signed)                       \
994         do {                                                            \
995                 ret = trace_define_field(event_call, #type, name,       \
996                                          offsetof(typeof(field), item), \
997                                          sizeof(field.item), is_signed, \
998                                          FILTER_OTHER);                 \
999                 if (ret)                                                \
1000                         return ret;                                     \
1001         } while (0)
1002
1003 static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1004 {
1005         int ret, i;
1006         struct kprobe_trace_entry field;
1007         struct trace_probe *tp = (struct trace_probe *)event_call->data;
1008
1009         ret = trace_define_common_fields(event_call);
1010         if (!ret)
1011                 return ret;
1012
1013         DEFINE_FIELD(unsigned long, ip, "ip", 0);
1014         DEFINE_FIELD(int, nargs, "nargs", 1);
1015         /* Set argument names as fields */
1016         for (i = 0; i < tp->nr_args; i++)
1017                 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1018         return 0;
1019 }
1020
1021 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1022 {
1023         int ret, i;
1024         struct kretprobe_trace_entry field;
1025         struct trace_probe *tp = (struct trace_probe *)event_call->data;
1026
1027         ret = trace_define_common_fields(event_call);
1028         if (!ret)
1029                 return ret;
1030
1031         DEFINE_FIELD(unsigned long, func, "func", 0);
1032         DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0);
1033         DEFINE_FIELD(int, nargs, "nargs", 1);
1034         /* Set argument names as fields */
1035         for (i = 0; i < tp->nr_args; i++)
1036                 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1037         return 0;
1038 }
1039
1040 static int __probe_event_show_format(struct trace_seq *s,
1041                                      struct trace_probe *tp, const char *fmt,
1042                                      const char *arg)
1043 {
1044         int i;
1045
1046         /* Show format */
1047         if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1048                 return 0;
1049
1050         for (i = 0; i < tp->nr_args; i++)
1051                 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1052                         return 0;
1053
1054         if (!trace_seq_printf(s, "\", %s", arg))
1055                 return 0;
1056
1057         for (i = 0; i < tp->nr_args; i++)
1058                 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1059                         return 0;
1060
1061         return trace_seq_puts(s, "\n");
1062 }
1063
1064 #undef SHOW_FIELD
1065 #define SHOW_FIELD(type, item, name)                                    \
1066         do {                                                            \
1067                 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"    \
1068                                 "offset:%u;\tsize:%u;\n", name,         \
1069                                 (unsigned int)offsetof(typeof(field), item),\
1070                                 (unsigned int)sizeof(type));            \
1071                 if (!ret)                                               \
1072                         return 0;                                       \
1073         } while (0)
1074
1075 static int kprobe_event_show_format(struct ftrace_event_call *call,
1076                                     struct trace_seq *s)
1077 {
1078         struct kprobe_trace_entry field __attribute__((unused));
1079         int ret, i;
1080         struct trace_probe *tp = (struct trace_probe *)call->data;
1081
1082         SHOW_FIELD(unsigned long, ip, "ip");
1083         SHOW_FIELD(int, nargs, "nargs");
1084
1085         /* Show fields */
1086         for (i = 0; i < tp->nr_args; i++)
1087                 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1088         trace_seq_puts(s, "\n");
1089
1090         return __probe_event_show_format(s, tp, "%lx:", "ip");
1091 }
1092
1093 static int kretprobe_event_show_format(struct ftrace_event_call *call,
1094                                        struct trace_seq *s)
1095 {
1096         struct kretprobe_trace_entry field __attribute__((unused));
1097         int ret, i;
1098         struct trace_probe *tp = (struct trace_probe *)call->data;
1099
1100         SHOW_FIELD(unsigned long, func, "func");
1101         SHOW_FIELD(unsigned long, ret_ip, "ret_ip");
1102         SHOW_FIELD(int, nargs, "nargs");
1103
1104         /* Show fields */
1105         for (i = 0; i < tp->nr_args; i++)
1106                 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1107         trace_seq_puts(s, "\n");
1108
1109         return __probe_event_show_format(s, tp, "%lx <- %lx:",
1110                                           "func, ret_ip");
1111 }
1112
1113 #ifdef CONFIG_EVENT_PROFILE
1114
1115 /* Kprobe profile handler */
1116 static __kprobes int kprobe_profile_func(struct kprobe *kp,
1117                                          struct pt_regs *regs)
1118 {
1119         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1120         struct ftrace_event_call *call = &tp->call;
1121         struct kprobe_trace_entry *entry;
1122         int size, i, pc;
1123         unsigned long irq_flags;
1124
1125         local_save_flags(irq_flags);
1126         pc = preempt_count();
1127
1128         size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1129
1130         do {
1131                 char raw_data[size];
1132                 struct trace_entry *ent;
1133
1134                 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1135                 entry = (struct kprobe_trace_entry *)raw_data;
1136                 ent = &entry->ent;
1137
1138                 tracing_generic_entry_update(ent, irq_flags, pc);
1139                 ent->type = call->id;
1140                 entry->nargs = tp->nr_args;
1141                 entry->ip = (unsigned long)kp->addr;
1142                 for (i = 0; i < tp->nr_args; i++)
1143                         entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1144                 perf_tpcounter_event(call->id, entry->ip, 1, entry, size);
1145         } while (0);
1146         return 0;
1147 }
1148
1149 /* Kretprobe profile handler */
1150 static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1151                                             struct pt_regs *regs)
1152 {
1153         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1154         struct ftrace_event_call *call = &tp->call;
1155         struct kretprobe_trace_entry *entry;
1156         int size, i, pc;
1157         unsigned long irq_flags;
1158
1159         local_save_flags(irq_flags);
1160         pc = preempt_count();
1161
1162         size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1163
1164         do {
1165                 char raw_data[size];
1166                 struct trace_entry *ent;
1167
1168                 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1169                 entry = (struct kretprobe_trace_entry *)raw_data;
1170                 ent = &entry->ent;
1171
1172                 tracing_generic_entry_update(ent, irq_flags, pc);
1173                 ent->type = call->id;
1174                 entry->nargs = tp->nr_args;
1175                 entry->func = (unsigned long)tp->rp.kp.addr;
1176                 entry->ret_ip = (unsigned long)ri->ret_addr;
1177                 for (i = 0; i < tp->nr_args; i++)
1178                         entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1179                 perf_tpcounter_event(call->id, entry->ret_ip, 1, entry, size);
1180         } while (0);
1181         return 0;
1182 }
1183
1184 static int probe_profile_enable(struct ftrace_event_call *call)
1185 {
1186         struct trace_probe *tp = (struct trace_probe *)call->data;
1187
1188         if (atomic_inc_return(&call->profile_count))
1189                 return 0;
1190
1191         if (probe_is_return(tp)) {
1192                 tp->rp.handler = kretprobe_profile_func;
1193                 return enable_kretprobe(&tp->rp);
1194         } else {
1195                 tp->rp.kp.pre_handler = kprobe_profile_func;
1196                 return enable_kprobe(&tp->rp.kp);
1197         }
1198 }
1199
1200 static void probe_profile_disable(struct ftrace_event_call *call)
1201 {
1202         if (atomic_add_negative(-1, &call->profile_count))
1203                 probe_event_disable(call);
1204 }
1205
1206 #endif  /* CONFIG_EVENT_PROFILE */
1207
1208 static int register_probe_event(struct trace_probe *tp)
1209 {
1210         struct ftrace_event_call *call = &tp->call;
1211         int ret;
1212
1213         /* Initialize ftrace_event_call */
1214         call->system = "kprobes";
1215         if (probe_is_return(tp)) {
1216                 tp->event.trace = print_kretprobe_event;
1217                 call->raw_init = probe_event_raw_init;
1218                 call->show_format = kretprobe_event_show_format;
1219                 call->define_fields = kretprobe_event_define_fields;
1220         } else {
1221                 tp->event.trace = print_kprobe_event;
1222                 call->raw_init = probe_event_raw_init;
1223                 call->show_format = kprobe_event_show_format;
1224                 call->define_fields = kprobe_event_define_fields;
1225         }
1226         call->event = &tp->event;
1227         call->id = register_ftrace_event(&tp->event);
1228         if (!call->id)
1229                 return -ENODEV;
1230         call->enabled = 1;
1231         call->regfunc = probe_event_enable;
1232         call->unregfunc = probe_event_disable;
1233
1234 #ifdef CONFIG_EVENT_PROFILE
1235         atomic_set(&call->profile_count, -1);
1236         call->profile_enable = probe_profile_enable;
1237         call->profile_disable = probe_profile_disable;
1238 #endif
1239         call->data = tp;
1240         ret = trace_add_event_call(call);
1241         if (ret) {
1242                 pr_info("Failed to register kprobe event: %s\n", call->name);
1243                 unregister_ftrace_event(&tp->event);
1244         }
1245         return ret;
1246 }
1247
1248 static void unregister_probe_event(struct trace_probe *tp)
1249 {
1250         /* tp->event is unregistered in trace_remove_event_call() */
1251         trace_remove_event_call(&tp->call);
1252 }
1253
1254 /* Make a debugfs interface for controling probe points */
1255 static __init int init_kprobe_trace(void)
1256 {
1257         struct dentry *d_tracer;
1258         struct dentry *entry;
1259
1260         d_tracer = tracing_init_dentry();
1261         if (!d_tracer)
1262                 return 0;
1263
1264         entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1265                                     NULL, &kprobe_events_ops);
1266
1267         /* Event list interface */
1268         if (!entry)
1269                 pr_warning("Could not create debugfs "
1270                            "'kprobe_events' entry\n");
1271
1272         /* Profile interface */
1273         entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1274                                     NULL, &kprobe_profile_ops);
1275
1276         if (!entry)
1277                 pr_warning("Could not create debugfs "
1278                            "'kprobe_profile' entry\n");
1279         return 0;
1280 }
1281 fs_initcall(init_kprobe_trace);
1282
1283
1284 #ifdef CONFIG_FTRACE_STARTUP_TEST
1285
1286 static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1287                                         int a4, int a5, int a6)
1288 {
1289         return a1 + a2 + a3 + a4 + a5 + a6;
1290 }
1291
1292 static __init int kprobe_trace_self_tests_init(void)
1293 {
1294         int ret;
1295         int (*target)(int, int, int, int, int, int);
1296
1297         target = kprobe_trace_selftest_target;
1298
1299         pr_info("Testing kprobe tracing: ");
1300
1301         ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1302                                   "a1 a2 a3 a4 a5 a6");
1303         if (WARN_ON_ONCE(ret))
1304                 pr_warning("error enabling function entry\n");
1305
1306         ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1307                                   "ra rv");
1308         if (WARN_ON_ONCE(ret))
1309                 pr_warning("error enabling function return\n");
1310
1311         ret = target(1, 2, 3, 4, 5, 6);
1312
1313         cleanup_all_probes();
1314
1315         pr_cont("OK\n");
1316         return 0;
1317 }
1318
1319 late_initcall(kprobe_trace_self_tests_init);
1320
1321 #endif