SAFE public projects git trees. - safe/jmp/linux-2.6/blob - arch/x86/kernel/ftrace.c

   1 /*
   2  * Code for replacing ftrace calls with jumps.
   3  *
   4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
   5  *
   6  * Thanks goes to Ingo Molnar, for suggesting the idea.
   7  * Mathieu Desnoyers, for suggesting postponing the modifications.
   8  * Arjan van de Ven, for keeping me straight, and explaining to me
   9  * the dangers of modifying code on the run.
  10  */
  11
  12 #include <linux/spinlock.h>
  13 #include <linux/hardirq.h>
  14 #include <linux/uaccess.h>
  15 #include <linux/ftrace.h>
  16 #include <linux/percpu.h>
  17 #include <linux/sched.h>
  18 #include <linux/init.h>
  19 #include <linux/list.h>
  20
  21 #include <asm/cacheflush.h>
  22 #include <asm/ftrace.h>
  23 #include <linux/ftrace.h>
  24 #include <asm/nops.h>
  25 #include <asm/nmi.h>
  26
  27
  28 #ifdef CONFIG_DYNAMIC_FTRACE
  29
  30 int ftrace_arch_code_modify_prepare(void)
  31 {
  32         set_kernel_text_rw();
  33         return 0;
  34 }
  35
  36 int ftrace_arch_code_modify_post_process(void)
  37 {
  38         set_kernel_text_ro();
  39         return 0;
  40 }
  41
  42 union ftrace_code_union {
  43         char code[MCOUNT_INSN_SIZE];
  44         struct {
  45                 char e8;
  46                 int offset;
  47         } __attribute__((packed));
  48 };
  49
  50 static int ftrace_calc_offset(long ip, long addr)
  51 {
  52         return (int)(addr - ip);
  53 }
  54
  55 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  56 {
  57         static union ftrace_code_union calc;
  58
  59         calc.e8         = 0xe8;
  60         calc.offset     = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
  61
  62         /*
  63          * No locking needed, this must be called via kstop_machine
  64          * which in essence is like running on a uniprocessor machine.
  65          */
  66         return calc.code;
  67 }
  68
  69 /*
  70  * Modifying code must take extra care. On an SMP machine, if
  71  * the code being modified is also being executed on another CPU
  72  * that CPU will have undefined results and possibly take a GPF.
  73  * We use kstop_machine to stop other CPUS from exectuing code.
  74  * But this does not stop NMIs from happening. We still need
  75  * to protect against that. We separate out the modification of
  76  * the code to take care of this.
  77  *
  78  * Two buffers are added: An IP buffer and a "code" buffer.
  79  *
  80  * 1) Put the instruction pointer into the IP buffer
  81  *    and the new code into the "code" buffer.
  82  * 2) Set a flag that says we are modifying code
  83  * 3) Wait for any running NMIs to finish.
  84  * 4) Write the code
  85  * 5) clear the flag.
  86  * 6) Wait for any running NMIs to finish.
  87  *
  88  * If an NMI is executed, the first thing it does is to call
  89  * "ftrace_nmi_enter". This will check if the flag is set to write
  90  * and if it is, it will write what is in the IP and "code" buffers.
  91  *
  92  * The trick is, it does not matter if everyone is writing the same
  93  * content to the code location. Also, if a CPU is executing code
  94  * it is OK to write to that code location if the contents being written
  95  * are the same as what exists.
  96  */
  97
  98 static atomic_t in_nmi = ATOMIC_INIT(0);
  99 static int mod_code_status;             /* holds return value of text write */
 100 static int mod_code_write;              /* set when NMI should do the write */
 101 static void *mod_code_ip;               /* holds the IP to write to */
 102 static void *mod_code_newcode;          /* holds the text to write to the IP */
 103
 104 static unsigned nmi_wait_count;
 105 static atomic_t nmi_update_count = ATOMIC_INIT(0);
 106
 107 int ftrace_arch_read_dyn_info(char *buf, int size)
 108 {
 109         int r;
 110
 111         r = snprintf(buf, size, "%u %u",
 112                      nmi_wait_count,
 113                      atomic_read(&nmi_update_count));
 114         return r;
 115 }
 116
 117 static void ftrace_mod_code(void)
 118 {
 119         /*
 120          * Yes, more than one CPU process can be writing to mod_code_status.
 121          *    (and the code itself)
 122          * But if one were to fail, then they all should, and if one were
 123          * to succeed, then they all should.
 124          */
 125         mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
 126                                              MCOUNT_INSN_SIZE);
 127 }
 128
 129 void ftrace_nmi_enter(void)
 130 {
 131         atomic_inc(&in_nmi);
 132         /* Must have in_nmi seen before reading write flag */
 133         smp_mb();
 134         if (mod_code_write) {
 135                 ftrace_mod_code();
 136                 atomic_inc(&nmi_update_count);
 137         }
 138 }
 139
 140 void ftrace_nmi_exit(void)
 141 {
 142         /* Finish all executions before clearing in_nmi */
 143         smp_wmb();
 144         atomic_dec(&in_nmi);
 145 }
 146
 147 static void wait_for_nmi(void)
 148 {
 149         int waited = 0;
 150
 151         while (atomic_read(&in_nmi)) {
 152                 waited = 1;
 153                 cpu_relax();
 154         }
 155
 156         if (waited)
 157                 nmi_wait_count++;
 158 }
 159
 160 static int
 161 do_ftrace_mod_code(unsigned long ip, void *new_code)
 162 {
 163         mod_code_ip = (void *)ip;
 164         mod_code_newcode = new_code;
 165
 166         /* The buffers need to be visible before we let NMIs write them */
 167         smp_wmb();
 168
 169         mod_code_write = 1;
 170
 171         /* Make sure write bit is visible before we wait on NMIs */
 172         smp_mb();
 173
 174         wait_for_nmi();
 175
 176         /* Make sure all running NMIs have finished before we write the code */
 177         smp_mb();
 178
 179         ftrace_mod_code();
 180
 181         /* Make sure the write happens before clearing the bit */
 182         smp_wmb();
 183
 184         mod_code_write = 0;
 185
 186         /* make sure NMIs see the cleared bit */
 187         smp_mb();
 188
 189         wait_for_nmi();
 190
 191         return mod_code_status;
 192 }
 193
 194
 195
 196
 197 static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
 198
 199 static unsigned char *ftrace_nop_replace(void)
 200 {
 201         return ftrace_nop;
 202 }
 203
 204 static int
 205 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 206                    unsigned char *new_code)
 207 {
 208         unsigned char replaced[MCOUNT_INSN_SIZE];
 209
 210         /*
 211          * Note: Due to modules and __init, code can
 212          *  disappear and change, we need to protect against faulting
 213          *  as well as code changing. We do this by using the
 214          *  probe_kernel_* functions.
 215          *
 216          * No real locking needed, this code is run through
 217          * kstop_machine, or before SMP starts.
 218          */
 219
 220         /* read the text we want to modify */
 221         if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
 222                 return -EFAULT;
 223
 224         /* Make sure it is what we expect it to be */
 225         if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
 226                 return -EINVAL;
 227
 228         /* replace the text with the new text */
 229         if (do_ftrace_mod_code(ip, new_code))
 230                 return -EPERM;
 231
 232         sync_core();
 233
 234         return 0;
 235 }
 236
 237 int ftrace_make_nop(struct module *mod,
 238                     struct dyn_ftrace *rec, unsigned long addr)
 239 {
 240         unsigned char *new, *old;
 241         unsigned long ip = rec->ip;
 242
 243         old = ftrace_call_replace(ip, addr);
 244         new = ftrace_nop_replace();
 245
 246         return ftrace_modify_code(rec->ip, old, new);
 247 }
 248
 249 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 250 {
 251         unsigned char *new, *old;
 252         unsigned long ip = rec->ip;
 253
 254         old = ftrace_nop_replace();
 255         new = ftrace_call_replace(ip, addr);
 256
 257         return ftrace_modify_code(rec->ip, old, new);
 258 }
 259
 260 int ftrace_update_ftrace_func(ftrace_func_t func)
 261 {
 262         unsigned long ip = (unsigned long)(&ftrace_call);
 263         unsigned char old[MCOUNT_INSN_SIZE], *new;
 264         int ret;
 265
 266         memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
 267         new = ftrace_call_replace(ip, (unsigned long)func);
 268         ret = ftrace_modify_code(ip, old, new);
 269
 270         return ret;
 271 }
 272
 273 int __init ftrace_dyn_arch_init(void *data)
 274 {
 275         extern const unsigned char ftrace_test_p6nop[];
 276         extern const unsigned char ftrace_test_nop5[];
 277         extern const unsigned char ftrace_test_jmp[];
 278         int faulted = 0;
 279
 280         /*
 281          * There is no good nop for all x86 archs.
 282          * We will default to using the P6_NOP5, but first we
 283          * will test to make sure that the nop will actually
 284          * work on this CPU. If it faults, we will then
 285          * go to a lesser efficient 5 byte nop. If that fails
 286          * we then just use a jmp as our nop. This isn't the most
 287          * efficient nop, but we can not use a multi part nop
 288          * since we would then risk being preempted in the middle
 289          * of that nop, and if we enabled tracing then, it might
 290          * cause a system crash.
 291          *
 292          * TODO: check the cpuid to determine the best nop.
 293          */
 294         asm volatile (
 295                 "ftrace_test_jmp:"
 296                 "jmp ftrace_test_p6nop\n"
 297                 "nop\n"
 298                 "nop\n"
 299                 "nop\n"  /* 2 byte jmp + 3 bytes */
 300                 "ftrace_test_p6nop:"
 301                 P6_NOP5
 302                 "jmp 1f\n"
 303                 "ftrace_test_nop5:"
 304                 ".byte 0x66,0x66,0x66,0x66,0x90\n"
 305                 "1:"
 306                 ".section .fixup, \"ax\"\n"
 307                 "2:     movl $1, %0\n"
 308                 "       jmp ftrace_test_nop5\n"
 309                 "3:     movl $2, %0\n"
 310                 "       jmp 1b\n"
 311                 ".previous\n"
 312                 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
 313                 _ASM_EXTABLE(ftrace_test_nop5, 3b)
 314                 : "=r"(faulted) : "0" (faulted));
 315
 316         switch (faulted) {
 317         case 0:
 318                 pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
 319                 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
 320                 break;
 321         case 1:
 322                 pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
 323                 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
 324                 break;
 325         case 2:
 326                 pr_info("ftrace: converting mcount calls to jmp . + 5\n");
 327                 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
 328                 break;
 329         }
 330
 331         /* The return code is retured via data */
 332         *(unsigned long *)data = 0;
 333
 334         return 0;
 335 }
 336 #endif
 337
 338 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 339
 340 #ifdef CONFIG_DYNAMIC_FTRACE
 341 extern void ftrace_graph_call(void);
 342
 343 static int ftrace_mod_jmp(unsigned long ip,
 344                           int old_offset, int new_offset)
 345 {
 346         unsigned char code[MCOUNT_INSN_SIZE];
 347
 348         if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
 349                 return -EFAULT;
 350
 351         if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
 352                 return -EINVAL;
 353
 354         *(int *)(&code[1]) = new_offset;
 355
 356         if (do_ftrace_mod_code(ip, &code))
 357                 return -EPERM;
 358
 359         return 0;
 360 }
 361
 362 int ftrace_enable_ftrace_graph_caller(void)
 363 {
 364         unsigned long ip = (unsigned long)(&ftrace_graph_call);
 365         int old_offset, new_offset;
 366
 367         old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
 368         new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
 369
 370         return ftrace_mod_jmp(ip, old_offset, new_offset);
 371 }
 372
 373 int ftrace_disable_ftrace_graph_caller(void)
 374 {
 375         unsigned long ip = (unsigned long)(&ftrace_graph_call);
 376         int old_offset, new_offset;
 377
 378         old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
 379         new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
 380
 381         return ftrace_mod_jmp(ip, old_offset, new_offset);
 382 }
 383
 384 #else /* CONFIG_DYNAMIC_FTRACE */
 385
 386 /*
 387  * These functions are picked from those used on
 388  * this page for dynamic ftrace. They have been
 389  * simplified to ignore all traces in NMI context.
 390  */
 391 static atomic_t in_nmi;
 392
 393 void ftrace_nmi_enter(void)
 394 {
 395         atomic_inc(&in_nmi);
 396 }
 397
 398 void ftrace_nmi_exit(void)
 399 {
 400         atomic_dec(&in_nmi);
 401 }
 402
 403 #endif /* !CONFIG_DYNAMIC_FTRACE */
 404
 405 /* Add a function return address to the trace stack on thread info.*/
 406 static int push_return_trace(unsigned long ret, unsigned long long time,
 407                                 unsigned long func, int *depth)
 408 {
 409         int index;
 410
 411         if (!current->ret_stack)
 412                 return -EBUSY;
 413
 414         /* The return trace stack is full */
 415         if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
 416                 atomic_inc(&current->trace_overrun);
 417                 return -EBUSY;
 418         }
 419
 420         index = ++current->curr_ret_stack;
 421         barrier();
 422         current->ret_stack[index].ret = ret;
 423         current->ret_stack[index].func = func;
 424         current->ret_stack[index].calltime = time;
 425         *depth = index;
 426
 427         return 0;
 428 }
 429
 430 /* Retrieve a function return address to the trace stack on thread info.*/
 431 static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 432 {
 433         int index;
 434
 435         index = current->curr_ret_stack;
 436
 437         if (unlikely(index < 0)) {
 438                 ftrace_graph_stop();
 439                 WARN_ON(1);
 440                 /* Might as well panic, otherwise we have no where to go */
 441                 *ret = (unsigned long)panic;
 442                 return;
 443         }
 444
 445         *ret = current->ret_stack[index].ret;
 446         trace->func = current->ret_stack[index].func;
 447         trace->calltime = current->ret_stack[index].calltime;
 448         trace->overrun = atomic_read(&current->trace_overrun);
 449         trace->depth = index;
 450         barrier();
 451         current->curr_ret_stack--;
 452
 453 }
 454
 455 /*
 456  * Send the trace to the ring-buffer.
 457  * @return the original return address.
 458  */
 459 unsigned long ftrace_return_to_handler(void)
 460 {
 461         struct ftrace_graph_ret trace;
 462         unsigned long ret;
 463
 464         pop_return_trace(&trace, &ret);
 465         trace.rettime = cpu_clock(raw_smp_processor_id());
 466         ftrace_graph_return(&trace);
 467
 468         if (unlikely(!ret)) {
 469                 ftrace_graph_stop();
 470                 WARN_ON(1);
 471                 /* Might as well panic. What else to do? */
 472                 ret = (unsigned long)panic;
 473         }
 474
 475         return ret;
 476 }
 477
 478 /*
 479  * Hook the return address and push it in the stack of return addrs
 480  * in current thread info.
 481  */
 482 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 483 {
 484         unsigned long old;
 485         unsigned long long calltime;
 486         int faulted;
 487         struct ftrace_graph_ent trace;
 488         unsigned long return_hooker = (unsigned long)
 489                                 &return_to_handler;
 490
 491         /* Nmi's are currently unsupported */
 492         if (unlikely(atomic_read(&in_nmi)))
 493                 return;
 494
 495         if (unlikely(atomic_read(&current->tracing_graph_pause)))
 496                 return;
 497
 498         /*
 499          * Protect against fault, even if it shouldn't
 500          * happen. This tool is too much intrusive to
 501          * ignore such a protection.
 502          */
 503         asm volatile(
 504                 "1: " _ASM_MOV " (%[parent]), %[old]\n"
 505                 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
 506                 "   movl $0, %[faulted]\n"
 507                 "3:\n"
 508
 509                 ".section .fixup, \"ax\"\n"
 510                 "4: movl $1, %[faulted]\n"
 511                 "   jmp 3b\n"
 512                 ".previous\n"
 513
 514                 _ASM_EXTABLE(1b, 4b)
 515                 _ASM_EXTABLE(2b, 4b)
 516
 517                 : [old] "=r" (old), [faulted] "=r" (faulted)
 518                 : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
 519                 : "memory"
 520         );
 521
 522         if (unlikely(faulted)) {
 523                 ftrace_graph_stop();
 524                 WARN_ON(1);
 525                 return;
 526         }
 527
 528         if (unlikely(!__kernel_text_address(old))) {
 529                 ftrace_graph_stop();
 530                 *parent = old;
 531                 WARN_ON(1);
 532                 return;
 533         }
 534
 535         calltime = cpu_clock(raw_smp_processor_id());
 536
 537         if (push_return_trace(old, calltime,
 538                                 self_addr, &trace.depth) == -EBUSY) {
 539                 *parent = old;
 540                 return;
 541         }
 542
 543         trace.func = self_addr;
 544
 545         /* Only trace if the calling function expects to */
 546         if (!ftrace_graph_entry(&trace)) {
 547                 current->curr_ret_stack--;
 548                 *parent = old;
 549         }
 550 }
 551 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */