SAFE public projects git trees. - safe/jmp/linux-2.6/blob - kernel/trace/trace_events.c

   1 /*
   2  * event tracer
   3  *
   4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
   5  *
   6  *  - Added format output of fields of the trace point.
   7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
   8  *
   9  */
  10
  11 #include <linux/workqueue.h>
  12 #include <linux/spinlock.h>
  13 #include <linux/kthread.h>
  14 #include <linux/debugfs.h>
  15 #include <linux/uaccess.h>
  16 #include <linux/module.h>
  17 #include <linux/ctype.h>
  18 #include <linux/delay.h>
  19
  20 #include <asm/setup.h>
  21
  22 #include "trace_output.h"
  23
  24 #undef TRACE_SYSTEM
  25 #define TRACE_SYSTEM "TRACE_SYSTEM"
  26
  27 DEFINE_MUTEX(event_mutex);
  28
  29 LIST_HEAD(ftrace_events);
  30
  31 int trace_define_field(struct ftrace_event_call *call, const char *type,
  32                        const char *name, int offset, int size, int is_signed,
  33                        int filter_type)
  34 {
  35         struct ftrace_event_field *field;
  36
  37         field = kzalloc(sizeof(*field), GFP_KERNEL);
  38         if (!field)
  39                 goto err;
  40
  41         field->name = kstrdup(name, GFP_KERNEL);
  42         if (!field->name)
  43                 goto err;
  44
  45         field->type = kstrdup(type, GFP_KERNEL);
  46         if (!field->type)
  47                 goto err;
  48
  49         if (filter_type == FILTER_OTHER)
  50                 field->filter_type = filter_assign_type(type);
  51         else
  52                 field->filter_type = filter_type;
  53
  54         field->offset = offset;
  55         field->size = size;
  56         field->is_signed = is_signed;
  57
  58         list_add(&field->link, &call->fields);
  59
  60         return 0;
  61
  62 err:
  63         if (field) {
  64                 kfree(field->name);
  65                 kfree(field->type);
  66         }
  67         kfree(field);
  68
  69         return -ENOMEM;
  70 }
  71 EXPORT_SYMBOL_GPL(trace_define_field);
  72
  73 #define __common_field(type, item)                                      \
  74         ret = trace_define_field(call, #type, "common_" #item,          \
  75                                  offsetof(typeof(ent), item),           \
  76                                  sizeof(ent.item),                      \
  77                                  is_signed_type(type), FILTER_OTHER);   \
  78         if (ret)                                                        \
  79                 return ret;
  80
  81 static int trace_define_common_fields(struct ftrace_event_call *call)
  82 {
  83         int ret;
  84         struct trace_entry ent;
  85
  86         __common_field(unsigned short, type);
  87         __common_field(unsigned char, flags);
  88         __common_field(unsigned char, preempt_count);
  89         __common_field(int, pid);
  90         __common_field(int, lock_depth);
  91
  92         return ret;
  93 }
  94
  95 void trace_destroy_fields(struct ftrace_event_call *call)
  96 {
  97         struct ftrace_event_field *field, *next;
  98
  99         list_for_each_entry_safe(field, next, &call->fields, link) {
 100                 list_del(&field->link);
 101                 kfree(field->type);
 102                 kfree(field->name);
 103                 kfree(field);
 104         }
 105 }
 106
 107 int trace_event_raw_init(struct ftrace_event_call *call)
 108 {
 109         int id;
 110
 111         id = register_ftrace_event(call->event);
 112         if (!id)
 113                 return -ENODEV;
 114         call->id = id;
 115         INIT_LIST_HEAD(&call->fields);
 116
 117         return 0;
 118 }
 119 EXPORT_SYMBOL_GPL(trace_event_raw_init);
 120
 121 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 122                                         int enable)
 123 {
 124         switch (enable) {
 125         case 0:
 126                 if (call->enabled) {
 127                         call->enabled = 0;
 128                         tracing_stop_cmdline_record();
 129                         call->unregfunc(call);
 130                 }
 131                 break;
 132         case 1:
 133                 if (!call->enabled) {
 134                         call->enabled = 1;
 135                         tracing_start_cmdline_record();
 136                         call->regfunc(call);
 137                 }
 138                 break;
 139         }
 140 }
 141
 142 static void ftrace_clear_events(void)
 143 {
 144         struct ftrace_event_call *call;
 145
 146         mutex_lock(&event_mutex);
 147         list_for_each_entry(call, &ftrace_events, list) {
 148                 ftrace_event_enable_disable(call, 0);
 149         }
 150         mutex_unlock(&event_mutex);
 151 }
 152
 153 /*
 154  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
 155  */
 156 static int __ftrace_set_clr_event(const char *match, const char *sub,
 157                                   const char *event, int set)
 158 {
 159         struct ftrace_event_call *call;
 160         int ret = -EINVAL;
 161
 162         mutex_lock(&event_mutex);
 163         list_for_each_entry(call, &ftrace_events, list) {
 164
 165                 if (!call->name || !call->regfunc)
 166                         continue;
 167
 168                 if (match &&
 169                     strcmp(match, call->name) != 0 &&
 170                     strcmp(match, call->system) != 0)
 171                         continue;
 172
 173                 if (sub && strcmp(sub, call->system) != 0)
 174                         continue;
 175
 176                 if (event && strcmp(event, call->name) != 0)
 177                         continue;
 178
 179                 ftrace_event_enable_disable(call, set);
 180
 181                 ret = 0;
 182         }
 183         mutex_unlock(&event_mutex);
 184
 185         return ret;
 186 }
 187
 188 static int ftrace_set_clr_event(char *buf, int set)
 189 {
 190         char *event = NULL, *sub = NULL, *match;
 191
 192         /*
 193          * The buf format can be <subsystem>:<event-name>
 194          *  *:<event-name> means any event by that name.
 195          *  :<event-name> is the same.
 196          *
 197          *  <subsystem>:* means all events in that subsystem
 198          *  <subsystem>: means the same.
 199          *
 200          *  <name> (no ':') means all events in a subsystem with
 201          *  the name <name> or any event that matches <name>
 202          */
 203
 204         match = strsep(&buf, ":");
 205         if (buf) {
 206                 sub = match;
 207                 event = buf;
 208                 match = NULL;
 209
 210                 if (!strlen(sub) || strcmp(sub, "*") == 0)
 211                         sub = NULL;
 212                 if (!strlen(event) || strcmp(event, "*") == 0)
 213                         event = NULL;
 214         }
 215
 216         return __ftrace_set_clr_event(match, sub, event, set);
 217 }
 218
 219 /**
 220  * trace_set_clr_event - enable or disable an event
 221  * @system: system name to match (NULL for any system)
 222  * @event: event name to match (NULL for all events, within system)
 223  * @set: 1 to enable, 0 to disable
 224  *
 225  * This is a way for other parts of the kernel to enable or disable
 226  * event recording.
 227  *
 228  * Returns 0 on success, -EINVAL if the parameters do not match any
 229  * registered events.
 230  */
 231 int trace_set_clr_event(const char *system, const char *event, int set)
 232 {
 233         return __ftrace_set_clr_event(NULL, system, event, set);
 234 }
 235
 236 /* 128 should be much more than enough */
 237 #define EVENT_BUF_SIZE          127
 238
 239 static ssize_t
 240 ftrace_event_write(struct file *file, const char __user *ubuf,
 241                    size_t cnt, loff_t *ppos)
 242 {
 243         struct trace_parser parser;
 244         ssize_t read, ret;
 245
 246         if (!cnt)
 247                 return 0;
 248
 249         ret = tracing_update_buffers();
 250         if (ret < 0)
 251                 return ret;
 252
 253         if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
 254                 return -ENOMEM;
 255
 256         read = trace_get_user(&parser, ubuf, cnt, ppos);
 257
 258         if (read >= 0 && trace_parser_loaded((&parser))) {
 259                 int set = 1;
 260
 261                 if (*parser.buffer == '!')
 262                         set = 0;
 263
 264                 parser.buffer[parser.idx] = 0;
 265
 266                 ret = ftrace_set_clr_event(parser.buffer + !set, set);
 267                 if (ret)
 268                         goto out_put;
 269         }
 270
 271         ret = read;
 272
 273  out_put:
 274         trace_parser_put(&parser);
 275
 276         return ret;
 277 }
 278
 279 static void *
 280 t_next(struct seq_file *m, void *v, loff_t *pos)
 281 {
 282         struct ftrace_event_call *call = v;
 283
 284         (*pos)++;
 285
 286         list_for_each_entry_continue(call, &ftrace_events, list) {
 287                 /*
 288                  * The ftrace subsystem is for showing formats only.
 289                  * They can not be enabled or disabled via the event files.
 290                  */
 291                 if (call->regfunc)
 292                         return call;
 293         }
 294
 295         return NULL;
 296 }
 297
 298 static void *t_start(struct seq_file *m, loff_t *pos)
 299 {
 300         struct ftrace_event_call *call;
 301         loff_t l;
 302
 303         mutex_lock(&event_mutex);
 304
 305         call = list_entry(&ftrace_events, struct ftrace_event_call, list);
 306         for (l = 0; l <= *pos; ) {
 307                 call = t_next(m, call, &l);
 308                 if (!call)
 309                         break;
 310         }
 311         return call;
 312 }
 313
 314 static void *
 315 s_next(struct seq_file *m, void *v, loff_t *pos)
 316 {
 317         struct ftrace_event_call *call = v;
 318
 319         (*pos)++;
 320
 321         list_for_each_entry_continue(call, &ftrace_events, list) {
 322                 if (call->enabled)
 323                         return call;
 324         }
 325
 326         return NULL;
 327 }
 328
 329 static void *s_start(struct seq_file *m, loff_t *pos)
 330 {
 331         struct ftrace_event_call *call;
 332         loff_t l;
 333
 334         mutex_lock(&event_mutex);
 335
 336         call = list_entry(&ftrace_events, struct ftrace_event_call, list);
 337         for (l = 0; l <= *pos; ) {
 338                 call = s_next(m, call, &l);
 339                 if (!call)
 340                         break;
 341         }
 342         return call;
 343 }
 344
 345 static int t_show(struct seq_file *m, void *v)
 346 {
 347         struct ftrace_event_call *call = v;
 348
 349         if (strcmp(call->system, TRACE_SYSTEM) != 0)
 350                 seq_printf(m, "%s:", call->system);
 351         seq_printf(m, "%s\n", call->name);
 352
 353         return 0;
 354 }
 355
 356 static void t_stop(struct seq_file *m, void *p)
 357 {
 358         mutex_unlock(&event_mutex);
 359 }
 360
 361 static int
 362 ftrace_event_seq_open(struct inode *inode, struct file *file)
 363 {
 364         const struct seq_operations *seq_ops;
 365
 366         if ((file->f_mode & FMODE_WRITE) &&
 367             (file->f_flags & O_TRUNC))
 368                 ftrace_clear_events();
 369
 370         seq_ops = inode->i_private;
 371         return seq_open(file, seq_ops);
 372 }
 373
 374 static ssize_t
 375 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 376                   loff_t *ppos)
 377 {
 378         struct ftrace_event_call *call = filp->private_data;
 379         char *buf;
 380
 381         if (call->enabled)
 382                 buf = "1\n";
 383         else
 384                 buf = "0\n";
 385
 386         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
 387 }
 388
 389 static ssize_t
 390 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 391                    loff_t *ppos)
 392 {
 393         struct ftrace_event_call *call = filp->private_data;
 394         char buf[64];
 395         unsigned long val;
 396         int ret;
 397
 398         if (cnt >= sizeof(buf))
 399                 return -EINVAL;
 400
 401         if (copy_from_user(&buf, ubuf, cnt))
 402                 return -EFAULT;
 403
 404         buf[cnt] = 0;
 405
 406         ret = strict_strtoul(buf, 10, &val);
 407         if (ret < 0)
 408                 return ret;
 409
 410         ret = tracing_update_buffers();
 411         if (ret < 0)
 412                 return ret;
 413
 414         switch (val) {
 415         case 0:
 416         case 1:
 417                 mutex_lock(&event_mutex);
 418                 ftrace_event_enable_disable(call, val);
 419                 mutex_unlock(&event_mutex);
 420                 break;
 421
 422         default:
 423                 return -EINVAL;
 424         }
 425
 426         *ppos += cnt;
 427
 428         return cnt;
 429 }
 430
 431 static ssize_t
 432 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 433                    loff_t *ppos)
 434 {
 435         const char set_to_char[4] = { '?', '0', '1', 'X' };
 436         const char *system = filp->private_data;
 437         struct ftrace_event_call *call;
 438         char buf[2];
 439         int set = 0;
 440         int ret;
 441
 442         mutex_lock(&event_mutex);
 443         list_for_each_entry(call, &ftrace_events, list) {
 444                 if (!call->name || !call->regfunc)
 445                         continue;
 446
 447                 if (system && strcmp(call->system, system) != 0)
 448                         continue;
 449
 450                 /*
 451                  * We need to find out if all the events are set
 452                  * or if all events or cleared, or if we have
 453                  * a mixture.
 454                  */
 455                 set |= (1 << !!call->enabled);
 456
 457                 /*
 458                  * If we have a mixture, no need to look further.
 459                  */
 460                 if (set == 3)
 461                         break;
 462         }
 463         mutex_unlock(&event_mutex);
 464
 465         buf[0] = set_to_char[set];
 466         buf[1] = '\n';
 467
 468         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
 469
 470         return ret;
 471 }
 472
 473 static ssize_t
 474 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 475                     loff_t *ppos)
 476 {
 477         const char *system = filp->private_data;
 478         unsigned long val;
 479         char buf[64];
 480         ssize_t ret;
 481
 482         if (cnt >= sizeof(buf))
 483                 return -EINVAL;
 484
 485         if (copy_from_user(&buf, ubuf, cnt))
 486                 return -EFAULT;
 487
 488         buf[cnt] = 0;
 489
 490         ret = strict_strtoul(buf, 10, &val);
 491         if (ret < 0)
 492                 return ret;
 493
 494         ret = tracing_update_buffers();
 495         if (ret < 0)
 496                 return ret;
 497
 498         if (val != 0 && val != 1)
 499                 return -EINVAL;
 500
 501         ret = __ftrace_set_clr_event(NULL, system, NULL, val);
 502         if (ret)
 503                 goto out;
 504
 505         ret = cnt;
 506
 507 out:
 508         *ppos += cnt;
 509
 510         return ret;
 511 }
 512
 513 extern char *__bad_type_size(void);
 514
 515 #undef FIELD
 516 #define FIELD(type, name)                                               \
 517         sizeof(type) != sizeof(field.name) ? __bad_type_size() :        \
 518         #type, "common_" #name, offsetof(typeof(field), name),          \
 519                 sizeof(field.name), is_signed_type(type)
 520
 521 static int trace_write_header(struct trace_seq *s)
 522 {
 523         struct trace_entry field;
 524
 525         /* struct trace_entry */
 526         return trace_seq_printf(s,
 527                         "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
 528                         "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
 529                         "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
 530                         "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
 531                         "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
 532                         "\n",
 533                         FIELD(unsigned short, type),
 534                         FIELD(unsigned char, flags),
 535                         FIELD(unsigned char, preempt_count),
 536                         FIELD(int, pid),
 537                         FIELD(int, lock_depth));
 538 }
 539
 540 static ssize_t
 541 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
 542                   loff_t *ppos)
 543 {
 544         struct ftrace_event_call *call = filp->private_data;
 545         struct trace_seq *s;
 546         char *buf;
 547         int r;
 548
 549         if (*ppos)
 550                 return 0;
 551
 552         s = kmalloc(sizeof(*s), GFP_KERNEL);
 553         if (!s)
 554                 return -ENOMEM;
 555
 556         trace_seq_init(s);
 557
 558         /* If any of the first writes fail, so will the show_format. */
 559
 560         trace_seq_printf(s, "name: %s\n", call->name);
 561         trace_seq_printf(s, "ID: %d\n", call->id);
 562         trace_seq_printf(s, "format:\n");
 563         trace_write_header(s);
 564
 565         r = call->show_format(call, s);
 566         if (!r) {
 567                 /*
 568                  * ug!  The format output is bigger than a PAGE!!
 569                  */
 570                 buf = "FORMAT TOO BIG\n";
 571                 r = simple_read_from_buffer(ubuf, cnt, ppos,
 572                                               buf, strlen(buf));
 573                 goto out;
 574         }
 575
 576         r = simple_read_from_buffer(ubuf, cnt, ppos,
 577                                     s->buffer, s->len);
 578  out:
 579         kfree(s);
 580         return r;
 581 }
 582
 583 static ssize_t
 584 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 585 {
 586         struct ftrace_event_call *call = filp->private_data;
 587         struct trace_seq *s;
 588         int r;
 589
 590         if (*ppos)
 591                 return 0;
 592
 593         s = kmalloc(sizeof(*s), GFP_KERNEL);
 594         if (!s)
 595                 return -ENOMEM;
 596
 597         trace_seq_init(s);
 598         trace_seq_printf(s, "%d\n", call->id);
 599
 600         r = simple_read_from_buffer(ubuf, cnt, ppos,
 601                                     s->buffer, s->len);
 602         kfree(s);
 603         return r;
 604 }
 605
 606 static ssize_t
 607 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 608                   loff_t *ppos)
 609 {
 610         struct ftrace_event_call *call = filp->private_data;
 611         struct trace_seq *s;
 612         int r;
 613
 614         if (*ppos)
 615                 return 0;
 616
 617         s = kmalloc(sizeof(*s), GFP_KERNEL);
 618         if (!s)
 619                 return -ENOMEM;
 620
 621         trace_seq_init(s);
 622
 623         print_event_filter(call, s);
 624         r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 625
 626         kfree(s);
 627
 628         return r;
 629 }
 630
 631 static ssize_t
 632 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 633                    loff_t *ppos)
 634 {
 635         struct ftrace_event_call *call = filp->private_data;
 636         char *buf;
 637         int err;
 638
 639         if (cnt >= PAGE_SIZE)
 640                 return -EINVAL;
 641
 642         buf = (char *)__get_free_page(GFP_TEMPORARY);
 643         if (!buf)
 644                 return -ENOMEM;
 645
 646         if (copy_from_user(buf, ubuf, cnt)) {
 647                 free_page((unsigned long) buf);
 648                 return -EFAULT;
 649         }
 650         buf[cnt] = '\0';
 651
 652         err = apply_event_filter(call, buf);
 653         free_page((unsigned long) buf);
 654         if (err < 0)
 655                 return err;
 656
 657         *ppos += cnt;
 658
 659         return cnt;
 660 }
 661
 662 static ssize_t
 663 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 664                       loff_t *ppos)
 665 {
 666         struct event_subsystem *system = filp->private_data;
 667         struct trace_seq *s;
 668         int r;
 669
 670         if (*ppos)
 671                 return 0;
 672
 673         s = kmalloc(sizeof(*s), GFP_KERNEL);
 674         if (!s)
 675                 return -ENOMEM;
 676
 677         trace_seq_init(s);
 678
 679         print_subsystem_event_filter(system, s);
 680         r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 681
 682         kfree(s);
 683
 684         return r;
 685 }
 686
 687 static ssize_t
 688 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 689                        loff_t *ppos)
 690 {
 691         struct event_subsystem *system = filp->private_data;
 692         char *buf;
 693         int err;
 694
 695         if (cnt >= PAGE_SIZE)
 696                 return -EINVAL;
 697
 698         buf = (char *)__get_free_page(GFP_TEMPORARY);
 699         if (!buf)
 700                 return -ENOMEM;
 701
 702         if (copy_from_user(buf, ubuf, cnt)) {
 703                 free_page((unsigned long) buf);
 704                 return -EFAULT;
 705         }
 706         buf[cnt] = '\0';
 707
 708         err = apply_subsystem_event_filter(system, buf);
 709         free_page((unsigned long) buf);
 710         if (err < 0)
 711                 return err;
 712
 713         *ppos += cnt;
 714
 715         return cnt;
 716 }
 717
 718 static ssize_t
 719 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 720 {
 721         int (*func)(struct trace_seq *s) = filp->private_data;
 722         struct trace_seq *s;
 723         int r;
 724
 725         if (*ppos)
 726                 return 0;
 727
 728         s = kmalloc(sizeof(*s), GFP_KERNEL);
 729         if (!s)
 730                 return -ENOMEM;
 731
 732         trace_seq_init(s);
 733
 734         func(s);
 735         r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 736
 737         kfree(s);
 738
 739         return r;
 740 }
 741
 742 static const struct seq_operations show_event_seq_ops = {
 743         .start = t_start,
 744         .next = t_next,
 745         .show = t_show,
 746         .stop = t_stop,
 747 };
 748
 749 static const struct seq_operations show_set_event_seq_ops = {
 750         .start = s_start,
 751         .next = s_next,
 752         .show = t_show,
 753         .stop = t_stop,
 754 };
 755
 756 static const struct file_operations ftrace_avail_fops = {
 757         .open = ftrace_event_seq_open,
 758         .read = seq_read,
 759         .llseek = seq_lseek,
 760         .release = seq_release,
 761 };
 762
 763 static const struct file_operations ftrace_set_event_fops = {
 764         .open = ftrace_event_seq_open,
 765         .read = seq_read,
 766         .write = ftrace_event_write,
 767         .llseek = seq_lseek,
 768         .release = seq_release,
 769 };
 770
 771 static const struct file_operations ftrace_enable_fops = {
 772         .open = tracing_open_generic,
 773         .read = event_enable_read,
 774         .write = event_enable_write,
 775 };
 776
 777 static const struct file_operations ftrace_event_format_fops = {
 778         .open = tracing_open_generic,
 779         .read = event_format_read,
 780 };
 781
 782 static const struct file_operations ftrace_event_id_fops = {
 783         .open = tracing_open_generic,
 784         .read = event_id_read,
 785 };
 786
 787 static const struct file_operations ftrace_event_filter_fops = {
 788         .open = tracing_open_generic,
 789         .read = event_filter_read,
 790         .write = event_filter_write,
 791 };
 792
 793 static const struct file_operations ftrace_subsystem_filter_fops = {
 794         .open = tracing_open_generic,
 795         .read = subsystem_filter_read,
 796         .write = subsystem_filter_write,
 797 };
 798
 799 static const struct file_operations ftrace_system_enable_fops = {
 800         .open = tracing_open_generic,
 801         .read = system_enable_read,
 802         .write = system_enable_write,
 803 };
 804
 805 static const struct file_operations ftrace_show_header_fops = {
 806         .open = tracing_open_generic,
 807         .read = show_header,
 808 };
 809
 810 static struct dentry *event_trace_events_dir(void)
 811 {
 812         static struct dentry *d_tracer;
 813         static struct dentry *d_events;
 814
 815         if (d_events)
 816                 return d_events;
 817
 818         d_tracer = tracing_init_dentry();
 819         if (!d_tracer)
 820                 return NULL;
 821
 822         d_events = debugfs_create_dir("events", d_tracer);
 823         if (!d_events)
 824                 pr_warning("Could not create debugfs "
 825                            "'events' directory\n");
 826
 827         return d_events;
 828 }
 829
 830 static LIST_HEAD(event_subsystems);
 831
 832 static struct dentry *
 833 event_subsystem_dir(const char *name, struct dentry *d_events)
 834 {
 835         struct event_subsystem *system;
 836         struct dentry *entry;
 837
 838         /* First see if we did not already create this dir */
 839         list_for_each_entry(system, &event_subsystems, list) {
 840                 if (strcmp(system->name, name) == 0) {
 841                         system->nr_events++;
 842                         return system->entry;
 843                 }
 844         }
 845
 846         /* need to create new entry */
 847         system = kmalloc(sizeof(*system), GFP_KERNEL);
 848         if (!system) {
 849                 pr_warning("No memory to create event subsystem %s\n",
 850                            name);
 851                 return d_events;
 852         }
 853
 854         system->entry = debugfs_create_dir(name, d_events);
 855         if (!system->entry) {
 856                 pr_warning("Could not create event subsystem %s\n",
 857                            name);
 858                 kfree(system);
 859                 return d_events;
 860         }
 861
 862         system->nr_events = 1;
 863         system->name = kstrdup(name, GFP_KERNEL);
 864         if (!system->name) {
 865                 debugfs_remove(system->entry);
 866                 kfree(system);
 867                 return d_events;
 868         }
 869
 870         list_add(&system->list, &event_subsystems);
 871
 872         system->filter = NULL;
 873
 874         system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
 875         if (!system->filter) {
 876                 pr_warning("Could not allocate filter for subsystem "
 877                            "'%s'\n", name);
 878                 return system->entry;
 879         }
 880
 881         entry = debugfs_create_file("filter", 0644, system->entry, system,
 882                                     &ftrace_subsystem_filter_fops);
 883         if (!entry) {
 884                 kfree(system->filter);
 885                 system->filter = NULL;
 886                 pr_warning("Could not create debugfs "
 887                            "'%s/filter' entry\n", name);
 888         }
 889
 890         trace_create_file("enable", 0644, system->entry,
 891                           (void *)system->name,
 892                           &ftrace_system_enable_fops);
 893
 894         return system->entry;
 895 }
 896
 897 static int
 898 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 899                  const struct file_operations *id,
 900                  const struct file_operations *enable,
 901                  const struct file_operations *filter,
 902                  const struct file_operations *format)
 903 {
 904         int ret;
 905
 906         /*
 907          * If the trace point header did not define TRACE_SYSTEM
 908          * then the system would be called "TRACE_SYSTEM".
 909          */
 910         if (strcmp(call->system, TRACE_SYSTEM) != 0)
 911                 d_events = event_subsystem_dir(call->system, d_events);
 912
 913         call->dir = debugfs_create_dir(call->name, d_events);
 914         if (!call->dir) {
 915                 pr_warning("Could not create debugfs "
 916                            "'%s' directory\n", call->name);
 917                 return -1;
 918         }
 919
 920         if (call->regfunc)
 921                 trace_create_file("enable", 0644, call->dir, call,
 922                                   enable);
 923
 924         if (call->id && call->profile_enable)
 925                 trace_create_file("id", 0444, call->dir, call,
 926                                   id);
 927
 928         if (call->define_fields) {
 929                 ret = trace_define_common_fields(call);
 930                 if (!ret)
 931                         ret = call->define_fields(call);
 932                 if (ret < 0) {
 933                         pr_warning("Could not initialize trace point"
 934                                    " events/%s\n", call->name);
 935                         return ret;
 936                 }
 937                 trace_create_file("filter", 0644, call->dir, call,
 938                                   filter);
 939         }
 940
 941         /* A trace may not want to export its format */
 942         if (!call->show_format)
 943                 return 0;
 944
 945         trace_create_file("format", 0444, call->dir, call,
 946                           format);
 947
 948         return 0;
 949 }
 950
 951 static int __trace_add_event_call(struct ftrace_event_call *call)
 952 {
 953         struct dentry *d_events;
 954         int ret;
 955
 956         if (!call->name)
 957                 return -EINVAL;
 958
 959         if (call->raw_init) {
 960                 ret = call->raw_init(call);
 961                 if (ret < 0) {
 962                         if (ret != -ENOSYS)
 963                                 pr_warning("Could not initialize trace "
 964                                 "events/%s\n", call->name);
 965                         return ret;
 966                 }
 967         }
 968
 969         d_events = event_trace_events_dir();
 970         if (!d_events)
 971                 return -ENOENT;
 972
 973         ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
 974                                 &ftrace_enable_fops, &ftrace_event_filter_fops,
 975                                 &ftrace_event_format_fops);
 976         if (!ret)
 977                 list_add(&call->list, &ftrace_events);
 978
 979         return ret;
 980 }
 981
 982 /* Add an additional event_call dynamically */
 983 int trace_add_event_call(struct ftrace_event_call *call)
 984 {
 985         int ret;
 986         mutex_lock(&event_mutex);
 987         ret = __trace_add_event_call(call);
 988         mutex_unlock(&event_mutex);
 989         return ret;
 990 }
 991
 992 static void remove_subsystem_dir(const char *name)
 993 {
 994         struct event_subsystem *system;
 995
 996         if (strcmp(name, TRACE_SYSTEM) == 0)
 997                 return;
 998
 999         list_for_each_entry(system, &event_subsystems, list) {
1000                 if (strcmp(system->name, name) == 0) {
1001                         if (!--system->nr_events) {
1002                                 struct event_filter *filter = system->filter;
1003
1004                                 debugfs_remove_recursive(system->entry);
1005                                 list_del(&system->list);
1006                                 if (filter) {
1007                                         kfree(filter->filter_string);
1008                                         kfree(filter);
1009                                 }
1010                                 kfree(system->name);
1011                                 kfree(system);
1012                         }
1013                         break;
1014                 }
1015         }
1016 }
1017
1018 /*
1019  * Must be called under locking both of event_mutex and trace_event_mutex.
1020  */
1021 static void __trace_remove_event_call(struct ftrace_event_call *call)
1022 {
1023         ftrace_event_enable_disable(call, 0);
1024         if (call->event)
1025                 __unregister_ftrace_event(call->event);
1026         debugfs_remove_recursive(call->dir);
1027         list_del(&call->list);
1028         trace_destroy_fields(call);
1029         destroy_preds(call);
1030         remove_subsystem_dir(call->system);
1031 }
1032
1033 /* Remove an event_call */
1034 void trace_remove_event_call(struct ftrace_event_call *call)
1035 {
1036         mutex_lock(&event_mutex);
1037         down_write(&trace_event_mutex);
1038         __trace_remove_event_call(call);
1039         up_write(&trace_event_mutex);
1040         mutex_unlock(&event_mutex);
1041 }
1042
1043 #define for_each_event(event, start, end)                       \
1044         for (event = start;                                     \
1045              (unsigned long)event < (unsigned long)end;         \
1046              event++)
1047
1048 #ifdef CONFIG_MODULES
1049
1050 static LIST_HEAD(ftrace_module_file_list);
1051
1052 /*
1053  * Modules must own their file_operations to keep up with
1054  * reference counting.
1055  */
1056 struct ftrace_module_file_ops {
1057         struct list_head                list;
1058         struct module                   *mod;
1059         struct file_operations          id;
1060         struct file_operations          enable;
1061         struct file_operations          format;
1062         struct file_operations          filter;
1063 };
1064
1065 static struct ftrace_module_file_ops *
1066 trace_create_file_ops(struct module *mod)
1067 {
1068         struct ftrace_module_file_ops *file_ops;
1069
1070         /*
1071          * This is a bit of a PITA. To allow for correct reference
1072          * counting, modules must "own" their file_operations.
1073          * To do this, we allocate the file operations that will be
1074          * used in the event directory.
1075          */
1076
1077         file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1078         if (!file_ops)
1079                 return NULL;
1080
1081         file_ops->mod = mod;
1082
1083         file_ops->id = ftrace_event_id_fops;
1084         file_ops->id.owner = mod;
1085
1086         file_ops->enable = ftrace_enable_fops;
1087         file_ops->enable.owner = mod;
1088
1089         file_ops->filter = ftrace_event_filter_fops;
1090         file_ops->filter.owner = mod;
1091
1092         file_ops->format = ftrace_event_format_fops;
1093         file_ops->format.owner = mod;
1094
1095         list_add(&file_ops->list, &ftrace_module_file_list);
1096
1097         return file_ops;
1098 }
1099
1100 static void trace_module_add_events(struct module *mod)
1101 {
1102         struct ftrace_module_file_ops *file_ops = NULL;
1103         struct ftrace_event_call *call, *start, *end;
1104         struct dentry *d_events;
1105         int ret;
1106
1107         start = mod->trace_events;
1108         end = mod->trace_events + mod->num_trace_events;
1109
1110         if (start == end)
1111                 return;
1112
1113         d_events = event_trace_events_dir();
1114         if (!d_events)
1115                 return;
1116
1117         for_each_event(call, start, end) {
1118                 /* The linker may leave blanks */
1119                 if (!call->name)
1120                         continue;
1121                 if (call->raw_init) {
1122                         ret = call->raw_init(call);
1123                         if (ret < 0) {
1124                                 if (ret != -ENOSYS)
1125                                         pr_warning("Could not initialize trace "
1126                                         "point events/%s\n", call->name);
1127                                 continue;
1128                         }
1129                 }
1130                 /*
1131                  * This module has events, create file ops for this module
1132                  * if not already done.
1133                  */
1134                 if (!file_ops) {
1135                         file_ops = trace_create_file_ops(mod);
1136                         if (!file_ops)
1137                                 return;
1138                 }
1139                 call->mod = mod;
1140                 ret = event_create_dir(call, d_events,
1141                                        &file_ops->id, &file_ops->enable,
1142                                        &file_ops->filter, &file_ops->format);
1143                 if (!ret)
1144                         list_add(&call->list, &ftrace_events);
1145         }
1146 }
1147
1148 static void trace_module_remove_events(struct module *mod)
1149 {
1150         struct ftrace_module_file_ops *file_ops;
1151         struct ftrace_event_call *call, *p;
1152         bool found = false;
1153
1154         down_write(&trace_event_mutex);
1155         list_for_each_entry_safe(call, p, &ftrace_events, list) {
1156                 if (call->mod == mod) {
1157                         found = true;
1158                         __trace_remove_event_call(call);
1159                 }
1160         }
1161
1162         /* Now free the file_operations */
1163         list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1164                 if (file_ops->mod == mod)
1165                         break;
1166         }
1167         if (&file_ops->list != &ftrace_module_file_list) {
1168                 list_del(&file_ops->list);
1169                 kfree(file_ops);
1170         }
1171
1172         /*
1173          * It is safest to reset the ring buffer if the module being unloaded
1174          * registered any events.
1175          */
1176         if (found)
1177                 tracing_reset_current_online_cpus();
1178         up_write(&trace_event_mutex);
1179 }
1180
1181 static int trace_module_notify(struct notifier_block *self,
1182                                unsigned long val, void *data)
1183 {
1184         struct module *mod = data;
1185
1186         mutex_lock(&event_mutex);
1187         switch (val) {
1188         case MODULE_STATE_COMING:
1189                 trace_module_add_events(mod);
1190                 break;
1191         case MODULE_STATE_GOING:
1192                 trace_module_remove_events(mod);
1193                 break;
1194         }
1195         mutex_unlock(&event_mutex);
1196
1197         return 0;
1198 }
1199 #else
1200 static int trace_module_notify(struct notifier_block *self,
1201                                unsigned long val, void *data)
1202 {
1203         return 0;
1204 }
1205 #endif /* CONFIG_MODULES */
1206
1207 static struct notifier_block trace_module_nb = {
1208         .notifier_call = trace_module_notify,
1209         .priority = 0,
1210 };
1211
1212 extern struct ftrace_event_call __start_ftrace_events[];
1213 extern struct ftrace_event_call __stop_ftrace_events[];
1214
1215 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1216
1217 static __init int setup_trace_event(char *str)
1218 {
1219         strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1220         ring_buffer_expanded = 1;
1221         tracing_selftest_disabled = 1;
1222
1223         return 1;
1224 }
1225 __setup("trace_event=", setup_trace_event);
1226
1227 static __init int event_trace_init(void)
1228 {
1229         struct ftrace_event_call *call;
1230         struct dentry *d_tracer;
1231         struct dentry *entry;
1232         struct dentry *d_events;
1233         int ret;
1234         char *buf = bootup_event_buf;
1235         char *token;
1236
1237         d_tracer = tracing_init_dentry();
1238         if (!d_tracer)
1239                 return 0;
1240
1241         entry = debugfs_create_file("available_events", 0444, d_tracer,
1242                                     (void *)&show_event_seq_ops,
1243                                     &ftrace_avail_fops);
1244         if (!entry)
1245                 pr_warning("Could not create debugfs "
1246                            "'available_events' entry\n");
1247
1248         entry = debugfs_create_file("set_event", 0644, d_tracer,
1249                                     (void *)&show_set_event_seq_ops,
1250                                     &ftrace_set_event_fops);
1251         if (!entry)
1252                 pr_warning("Could not create debugfs "
1253                            "'set_event' entry\n");
1254
1255         d_events = event_trace_events_dir();
1256         if (!d_events)
1257                 return 0;
1258
1259         /* ring buffer internal formats */
1260         trace_create_file("header_page", 0444, d_events,
1261                           ring_buffer_print_page_header,
1262                           &ftrace_show_header_fops);
1263
1264         trace_create_file("header_event", 0444, d_events,
1265                           ring_buffer_print_entry_header,
1266                           &ftrace_show_header_fops);
1267
1268         trace_create_file("enable", 0644, d_events,
1269                           NULL, &ftrace_system_enable_fops);
1270
1271         for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1272                 /* The linker may leave blanks */
1273                 if (!call->name)
1274                         continue;
1275                 if (call->raw_init) {
1276                         ret = call->raw_init(call);
1277                         if (ret < 0) {
1278                                 if (ret != -ENOSYS)
1279                                         pr_warning("Could not initialize trace "
1280                                         "point events/%s\n", call->name);
1281                                 continue;
1282                         }
1283                 }
1284                 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1285                                        &ftrace_enable_fops,
1286                                        &ftrace_event_filter_fops,
1287                                        &ftrace_event_format_fops);
1288                 if (!ret)
1289                         list_add(&call->list, &ftrace_events);
1290         }
1291
1292         while (true) {
1293                 token = strsep(&buf, ",");
1294
1295                 if (!token)
1296                         break;
1297                 if (!*token)
1298                         continue;
1299
1300                 ret = ftrace_set_clr_event(token, 1);
1301                 if (ret)
1302                         pr_warning("Failed to enable trace event: %s\n", token);
1303         }
1304
1305         ret = register_module_notifier(&trace_module_nb);
1306         if (ret)
1307                 pr_warning("Failed to register trace events module notifier\n");
1308
1309         return 0;
1310 }
1311 fs_initcall(event_trace_init);
1312
1313 #ifdef CONFIG_FTRACE_STARTUP_TEST
1314
1315 static DEFINE_SPINLOCK(test_spinlock);
1316 static DEFINE_SPINLOCK(test_spinlock_irq);
1317 static DEFINE_MUTEX(test_mutex);
1318
1319 static __init void test_work(struct work_struct *dummy)
1320 {
1321         spin_lock(&test_spinlock);
1322         spin_lock_irq(&test_spinlock_irq);
1323         udelay(1);
1324         spin_unlock_irq(&test_spinlock_irq);
1325         spin_unlock(&test_spinlock);
1326
1327         mutex_lock(&test_mutex);
1328         msleep(1);
1329         mutex_unlock(&test_mutex);
1330 }
1331
1332 static __init int event_test_thread(void *unused)
1333 {
1334         void *test_malloc;
1335
1336         test_malloc = kmalloc(1234, GFP_KERNEL);
1337         if (!test_malloc)
1338                 pr_info("failed to kmalloc\n");
1339
1340         schedule_on_each_cpu(test_work);
1341
1342         kfree(test_malloc);
1343
1344         set_current_state(TASK_INTERRUPTIBLE);
1345         while (!kthread_should_stop())
1346                 schedule();
1347
1348         return 0;
1349 }
1350
1351 /*
1352  * Do various things that may trigger events.
1353  */
1354 static __init void event_test_stuff(void)
1355 {
1356         struct task_struct *test_thread;
1357
1358         test_thread = kthread_run(event_test_thread, NULL, "test-events");
1359         msleep(1);
1360         kthread_stop(test_thread);
1361 }
1362
1363 /*
1364  * For every trace event defined, we will test each trace point separately,
1365  * and then by groups, and finally all trace points.
1366  */
1367 static __init void event_trace_self_tests(void)
1368 {
1369         struct ftrace_event_call *call;
1370         struct event_subsystem *system;
1371         int ret;
1372
1373         pr_info("Running tests on trace events:\n");
1374
1375         list_for_each_entry(call, &ftrace_events, list) {
1376
1377                 /* Only test those that have a regfunc */
1378                 if (!call->regfunc)
1379                         continue;
1380
1381 /*
1382  * Testing syscall events here is pretty useless, but
1383  * we still do it if configured. But this is time consuming.
1384  * What we really need is a user thread to perform the
1385  * syscalls as we test.
1386  */
1387 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1388                 if (call->system &&
1389                     strcmp(call->system, "syscalls") == 0)
1390                         continue;
1391 #endif
1392
1393                 pr_info("Testing event %s: ", call->name);
1394
1395                 /*
1396                  * If an event is already enabled, someone is using
1397                  * it and the self test should not be on.
1398                  */
1399                 if (call->enabled) {
1400                         pr_warning("Enabled event during self test!\n");
1401                         WARN_ON_ONCE(1);
1402                         continue;
1403                 }
1404
1405                 ftrace_event_enable_disable(call, 1);
1406                 event_test_stuff();
1407                 ftrace_event_enable_disable(call, 0);
1408
1409                 pr_cont("OK\n");
1410         }
1411
1412         /* Now test at the sub system level */
1413
1414         pr_info("Running tests on trace event systems:\n");
1415
1416         list_for_each_entry(system, &event_subsystems, list) {
1417
1418                 /* the ftrace system is special, skip it */
1419                 if (strcmp(system->name, "ftrace") == 0)
1420                         continue;
1421
1422                 pr_info("Testing event system %s: ", system->name);
1423
1424                 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1425                 if (WARN_ON_ONCE(ret)) {
1426                         pr_warning("error enabling system %s\n",
1427                                    system->name);
1428                         continue;
1429                 }
1430
1431                 event_test_stuff();
1432
1433                 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1434                 if (WARN_ON_ONCE(ret))
1435                         pr_warning("error disabling system %s\n",
1436                                    system->name);
1437
1438                 pr_cont("OK\n");
1439         }
1440
1441         /* Test with all events enabled */
1442
1443         pr_info("Running tests on all trace events:\n");
1444         pr_info("Testing all events: ");
1445
1446         ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1447         if (WARN_ON_ONCE(ret)) {
1448                 pr_warning("error enabling all events\n");
1449                 return;
1450         }
1451
1452         event_test_stuff();
1453
1454         /* reset sysname */
1455         ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1456         if (WARN_ON_ONCE(ret)) {
1457                 pr_warning("error disabling all events\n");
1458                 return;
1459         }
1460
1461         pr_cont("OK\n");
1462 }
1463
1464 #ifdef CONFIG_FUNCTION_TRACER
1465
1466 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1467
1468 static void
1469 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1470 {
1471         struct ring_buffer_event *event;
1472         struct ring_buffer *buffer;
1473         struct ftrace_entry *entry;
1474         unsigned long flags;
1475         long disabled;
1476         int resched;
1477         int cpu;
1478         int pc;
1479
1480         pc = preempt_count();
1481         resched = ftrace_preempt_disable();
1482         cpu = raw_smp_processor_id();
1483         disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1484
1485         if (disabled != 1)
1486                 goto out;
1487
1488         local_save_flags(flags);
1489
1490         event = trace_current_buffer_lock_reserve(&buffer,
1491                                                   TRACE_FN, sizeof(*entry),
1492                                                   flags, pc);
1493         if (!event)
1494                 goto out;
1495         entry   = ring_buffer_event_data(event);
1496         entry->ip                       = ip;
1497         entry->parent_ip                = parent_ip;
1498
1499         trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1500
1501  out:
1502         atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1503         ftrace_preempt_enable(resched);
1504 }
1505
1506 static struct ftrace_ops trace_ops __initdata  =
1507 {
1508         .func = function_test_events_call,
1509 };
1510
1511 static __init void event_trace_self_test_with_function(void)
1512 {
1513         register_ftrace_function(&trace_ops);
1514         pr_info("Running tests again, along with the function tracer\n");
1515         event_trace_self_tests();
1516         unregister_ftrace_function(&trace_ops);
1517 }
1518 #else
1519 static __init void event_trace_self_test_with_function(void)
1520 {
1521 }
1522 #endif
1523
1524 static __init int event_trace_self_tests_init(void)
1525 {
1526         if (!tracing_selftest_disabled) {
1527                 event_trace_self_tests();
1528                 event_trace_self_test_with_function();
1529         }
1530
1531         return 0;
1532 }
1533
1534 late_initcall(event_trace_self_tests_init);
1535
1536 #endif