genirq: Move two IRQ functions from .init.text to .text
[safe/jmp/linux-2.6] / kernel / kprobes.c
1 /*
2  *  Kernel Probes (KProbes)
3  *  kernel/kprobes.c
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) IBM Corporation, 2002, 2004
20  *
21  * 2002-Oct     Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
22  *              Probes initial implementation (includes suggestions from
23  *              Rusty Russell).
24  * 2004-Aug     Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
25  *              hlists and exceptions notifier as suggested by Andi Kleen.
26  * 2004-July    Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
27  *              interface to access function arguments.
28  * 2004-Sep     Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
29  *              exceptions notifier to be first on the priority list.
30  * 2005-May     Hien Nguyen <hien@us.ibm.com>, Jim Keniston
31  *              <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
32  *              <prasanna@in.ibm.com> added function-return probes.
33  */
34 #include <linux/kprobes.h>
35 #include <linux/hash.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/stddef.h>
39 #include <linux/module.h>
40 #include <linux/moduleloader.h>
41 #include <linux/kallsyms.h>
42 #include <linux/freezer.h>
43 #include <linux/seq_file.h>
44 #include <linux/debugfs.h>
45 #include <linux/sysctl.h>
46 #include <linux/kdebug.h>
47 #include <linux/memory.h>
48 #include <linux/ftrace.h>
49 #include <linux/cpu.h>
50
51 #include <asm-generic/sections.h>
52 #include <asm/cacheflush.h>
53 #include <asm/errno.h>
54 #include <asm/uaccess.h>
55
56 #define KPROBE_HASH_BITS 6
57 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
58
59
60 /*
61  * Some oddball architectures like 64bit powerpc have function descriptors
62  * so this must be overridable.
63  */
64 #ifndef kprobe_lookup_name
65 #define kprobe_lookup_name(name, addr) \
66         addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
67 #endif
68
69 static int kprobes_initialized;
70 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
71 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
72
73 /* NOTE: change this value only with kprobe_mutex held */
74 static bool kprobes_all_disarmed;
75
76 static DEFINE_MUTEX(kprobe_mutex);      /* Protects kprobe_table */
77 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
78 static struct {
79         spinlock_t lock ____cacheline_aligned_in_smp;
80 } kretprobe_table_locks[KPROBE_TABLE_SIZE];
81
82 static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
83 {
84         return &(kretprobe_table_locks[hash].lock);
85 }
86
87 /*
88  * Normally, functions that we'd want to prohibit kprobes in, are marked
89  * __kprobes. But, there are cases where such functions already belong to
90  * a different section (__sched for preempt_schedule)
91  *
92  * For such cases, we now have a blacklist
93  */
94 static struct kprobe_blackpoint kprobe_blacklist[] = {
95         {"preempt_schedule",},
96         {"native_get_debugreg",},
97         {"irq_entries_start",},
98         {"common_interrupt",},
99         {"mcount",},    /* mcount can be called from everywhere */
100         {NULL}    /* Terminator */
101 };
102
103 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
104 /*
105  * kprobe->ainsn.insn points to the copy of the instruction to be
106  * single-stepped. x86_64, POWER4 and above have no-exec support and
107  * stepping on the instruction on a vmalloced/kmalloced/data page
108  * is a recipe for disaster
109  */
110 struct kprobe_insn_page {
111         struct list_head list;
112         kprobe_opcode_t *insns;         /* Page of instruction slots */
113         int nused;
114         int ngarbage;
115         char slot_used[];
116 };
117
118 #define KPROBE_INSN_PAGE_SIZE(slots)                    \
119         (offsetof(struct kprobe_insn_page, slot_used) + \
120          (sizeof(char) * (slots)))
121
122 struct kprobe_insn_cache {
123         struct list_head pages; /* list of kprobe_insn_page */
124         size_t insn_size;       /* size of instruction slot */
125         int nr_garbage;
126 };
127
128 static int slots_per_page(struct kprobe_insn_cache *c)
129 {
130         return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
131 }
132
133 enum kprobe_slot_state {
134         SLOT_CLEAN = 0,
135         SLOT_DIRTY = 1,
136         SLOT_USED = 2,
137 };
138
139 static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_slots */
140 static struct kprobe_insn_cache kprobe_insn_slots = {
141         .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
142         .insn_size = MAX_INSN_SIZE,
143         .nr_garbage = 0,
144 };
145 static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
146
147 /**
148  * __get_insn_slot() - Find a slot on an executable page for an instruction.
149  * We allocate an executable page if there's no room on existing ones.
150  */
151 static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
152 {
153         struct kprobe_insn_page *kip;
154
155  retry:
156         list_for_each_entry(kip, &c->pages, list) {
157                 if (kip->nused < slots_per_page(c)) {
158                         int i;
159                         for (i = 0; i < slots_per_page(c); i++) {
160                                 if (kip->slot_used[i] == SLOT_CLEAN) {
161                                         kip->slot_used[i] = SLOT_USED;
162                                         kip->nused++;
163                                         return kip->insns + (i * c->insn_size);
164                                 }
165                         }
166                         /* kip->nused is broken. Fix it. */
167                         kip->nused = slots_per_page(c);
168                         WARN_ON(1);
169                 }
170         }
171
172         /* If there are any garbage slots, collect it and try again. */
173         if (c->nr_garbage && collect_garbage_slots(c) == 0)
174                 goto retry;
175
176         /* All out of space.  Need to allocate a new page. */
177         kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
178         if (!kip)
179                 return NULL;
180
181         /*
182          * Use module_alloc so this page is within +/- 2GB of where the
183          * kernel image and loaded module images reside. This is required
184          * so x86_64 can correctly handle the %rip-relative fixups.
185          */
186         kip->insns = module_alloc(PAGE_SIZE);
187         if (!kip->insns) {
188                 kfree(kip);
189                 return NULL;
190         }
191         INIT_LIST_HEAD(&kip->list);
192         memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
193         kip->slot_used[0] = SLOT_USED;
194         kip->nused = 1;
195         kip->ngarbage = 0;
196         list_add(&kip->list, &c->pages);
197         return kip->insns;
198 }
199
200
201 kprobe_opcode_t __kprobes *get_insn_slot(void)
202 {
203         kprobe_opcode_t *ret = NULL;
204
205         mutex_lock(&kprobe_insn_mutex);
206         ret = __get_insn_slot(&kprobe_insn_slots);
207         mutex_unlock(&kprobe_insn_mutex);
208
209         return ret;
210 }
211
212 /* Return 1 if all garbages are collected, otherwise 0. */
213 static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
214 {
215         kip->slot_used[idx] = SLOT_CLEAN;
216         kip->nused--;
217         if (kip->nused == 0) {
218                 /*
219                  * Page is no longer in use.  Free it unless
220                  * it's the last one.  We keep the last one
221                  * so as not to have to set it up again the
222                  * next time somebody inserts a probe.
223                  */
224                 if (!list_is_singular(&kip->list)) {
225                         list_del(&kip->list);
226                         module_free(NULL, kip->insns);
227                         kfree(kip);
228                 }
229                 return 1;
230         }
231         return 0;
232 }
233
234 static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
235 {
236         struct kprobe_insn_page *kip, *next;
237
238         /* Ensure no-one is interrupted on the garbages */
239         synchronize_sched();
240
241         list_for_each_entry_safe(kip, next, &c->pages, list) {
242                 int i;
243                 if (kip->ngarbage == 0)
244                         continue;
245                 kip->ngarbage = 0;      /* we will collect all garbages */
246                 for (i = 0; i < slots_per_page(c); i++) {
247                         if (kip->slot_used[i] == SLOT_DIRTY &&
248                             collect_one_slot(kip, i))
249                                 break;
250                 }
251         }
252         c->nr_garbage = 0;
253         return 0;
254 }
255
256 static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
257                                        kprobe_opcode_t *slot, int dirty)
258 {
259         struct kprobe_insn_page *kip;
260
261         list_for_each_entry(kip, &c->pages, list) {
262                 long idx = ((long)slot - (long)kip->insns) / c->insn_size;
263                 if (idx >= 0 && idx < slots_per_page(c)) {
264                         WARN_ON(kip->slot_used[idx] != SLOT_USED);
265                         if (dirty) {
266                                 kip->slot_used[idx] = SLOT_DIRTY;
267                                 kip->ngarbage++;
268                                 if (++c->nr_garbage > slots_per_page(c))
269                                         collect_garbage_slots(c);
270                         } else
271                                 collect_one_slot(kip, idx);
272                         return;
273                 }
274         }
275         /* Could not free this slot. */
276         WARN_ON(1);
277 }
278
279 void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
280 {
281         mutex_lock(&kprobe_insn_mutex);
282         __free_insn_slot(&kprobe_insn_slots, slot, dirty);
283         mutex_unlock(&kprobe_insn_mutex);
284 }
285 #ifdef CONFIG_OPTPROBES
286 /* For optimized_kprobe buffer */
287 static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
288 static struct kprobe_insn_cache kprobe_optinsn_slots = {
289         .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
290         /* .insn_size is initialized later */
291         .nr_garbage = 0,
292 };
293 /* Get a slot for optimized_kprobe buffer */
294 kprobe_opcode_t __kprobes *get_optinsn_slot(void)
295 {
296         kprobe_opcode_t *ret = NULL;
297
298         mutex_lock(&kprobe_optinsn_mutex);
299         ret = __get_insn_slot(&kprobe_optinsn_slots);
300         mutex_unlock(&kprobe_optinsn_mutex);
301
302         return ret;
303 }
304
305 void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
306 {
307         mutex_lock(&kprobe_optinsn_mutex);
308         __free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
309         mutex_unlock(&kprobe_optinsn_mutex);
310 }
311 #endif
312 #endif
313
314 /* We have preemption disabled.. so it is safe to use __ versions */
315 static inline void set_kprobe_instance(struct kprobe *kp)
316 {
317         __get_cpu_var(kprobe_instance) = kp;
318 }
319
320 static inline void reset_kprobe_instance(void)
321 {
322         __get_cpu_var(kprobe_instance) = NULL;
323 }
324
325 /*
326  * This routine is called either:
327  *      - under the kprobe_mutex - during kprobe_[un]register()
328  *                              OR
329  *      - with preemption disabled - from arch/xxx/kernel/kprobes.c
330  */
331 struct kprobe __kprobes *get_kprobe(void *addr)
332 {
333         struct hlist_head *head;
334         struct hlist_node *node;
335         struct kprobe *p;
336
337         head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
338         hlist_for_each_entry_rcu(p, node, head, hlist) {
339                 if (p->addr == addr)
340                         return p;
341         }
342
343         return NULL;
344 }
345
346 static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
347
348 /* Return true if the kprobe is an aggregator */
349 static inline int kprobe_aggrprobe(struct kprobe *p)
350 {
351         return p->pre_handler == aggr_pre_handler;
352 }
353
354 /*
355  * Keep all fields in the kprobe consistent
356  */
357 static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
358 {
359         memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
360         memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
361 }
362
363 #ifdef CONFIG_OPTPROBES
364 /* NOTE: change this value only with kprobe_mutex held */
365 static bool kprobes_allow_optimization;
366
367 /*
368  * Call all pre_handler on the list, but ignores its return value.
369  * This must be called from arch-dep optimized caller.
370  */
371 void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
372 {
373         struct kprobe *kp;
374
375         list_for_each_entry_rcu(kp, &p->list, list) {
376                 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
377                         set_kprobe_instance(kp);
378                         kp->pre_handler(kp, regs);
379                 }
380                 reset_kprobe_instance();
381         }
382 }
383
384 /* Return true(!0) if the kprobe is ready for optimization. */
385 static inline int kprobe_optready(struct kprobe *p)
386 {
387         struct optimized_kprobe *op;
388
389         if (kprobe_aggrprobe(p)) {
390                 op = container_of(p, struct optimized_kprobe, kp);
391                 return arch_prepared_optinsn(&op->optinsn);
392         }
393
394         return 0;
395 }
396
397 /*
398  * Return an optimized kprobe whose optimizing code replaces
399  * instructions including addr (exclude breakpoint).
400  */
401 struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
402 {
403         int i;
404         struct kprobe *p = NULL;
405         struct optimized_kprobe *op;
406
407         /* Don't check i == 0, since that is a breakpoint case. */
408         for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++)
409                 p = get_kprobe((void *)(addr - i));
410
411         if (p && kprobe_optready(p)) {
412                 op = container_of(p, struct optimized_kprobe, kp);
413                 if (arch_within_optimized_kprobe(op, addr))
414                         return p;
415         }
416
417         return NULL;
418 }
419
420 /* Optimization staging list, protected by kprobe_mutex */
421 static LIST_HEAD(optimizing_list);
422
423 static void kprobe_optimizer(struct work_struct *work);
424 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
425 #define OPTIMIZE_DELAY 5
426
427 /* Kprobe jump optimizer */
428 static __kprobes void kprobe_optimizer(struct work_struct *work)
429 {
430         struct optimized_kprobe *op, *tmp;
431
432         /* Lock modules while optimizing kprobes */
433         mutex_lock(&module_mutex);
434         mutex_lock(&kprobe_mutex);
435         if (kprobes_all_disarmed || !kprobes_allow_optimization)
436                 goto end;
437
438         /*
439          * Wait for quiesence period to ensure all running interrupts
440          * are done. Because optprobe may modify multiple instructions
441          * there is a chance that Nth instruction is interrupted. In that
442          * case, running interrupt can return to 2nd-Nth byte of jump
443          * instruction. This wait is for avoiding it.
444          */
445         synchronize_sched();
446
447         /*
448          * The optimization/unoptimization refers online_cpus via
449          * stop_machine() and cpu-hotplug modifies online_cpus.
450          * And same time, text_mutex will be held in cpu-hotplug and here.
451          * This combination can cause a deadlock (cpu-hotplug try to lock
452          * text_mutex but stop_machine can not be done because online_cpus
453          * has been changed)
454          * To avoid this deadlock, we need to call get_online_cpus()
455          * for preventing cpu-hotplug outside of text_mutex locking.
456          */
457         get_online_cpus();
458         mutex_lock(&text_mutex);
459         list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
460                 WARN_ON(kprobe_disabled(&op->kp));
461                 if (arch_optimize_kprobe(op) < 0)
462                         op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
463                 list_del_init(&op->list);
464         }
465         mutex_unlock(&text_mutex);
466         put_online_cpus();
467 end:
468         mutex_unlock(&kprobe_mutex);
469         mutex_unlock(&module_mutex);
470 }
471
472 /* Optimize kprobe if p is ready to be optimized */
473 static __kprobes void optimize_kprobe(struct kprobe *p)
474 {
475         struct optimized_kprobe *op;
476
477         /* Check if the kprobe is disabled or not ready for optimization. */
478         if (!kprobe_optready(p) || !kprobes_allow_optimization ||
479             (kprobe_disabled(p) || kprobes_all_disarmed))
480                 return;
481
482         /* Both of break_handler and post_handler are not supported. */
483         if (p->break_handler || p->post_handler)
484                 return;
485
486         op = container_of(p, struct optimized_kprobe, kp);
487
488         /* Check there is no other kprobes at the optimized instructions */
489         if (arch_check_optimized_kprobe(op) < 0)
490                 return;
491
492         /* Check if it is already optimized. */
493         if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
494                 return;
495
496         op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
497         list_add(&op->list, &optimizing_list);
498         if (!delayed_work_pending(&optimizing_work))
499                 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
500 }
501
502 /* Unoptimize a kprobe if p is optimized */
503 static __kprobes void unoptimize_kprobe(struct kprobe *p)
504 {
505         struct optimized_kprobe *op;
506
507         if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
508                 op = container_of(p, struct optimized_kprobe, kp);
509                 if (!list_empty(&op->list))
510                         /* Dequeue from the optimization queue */
511                         list_del_init(&op->list);
512                 else
513                         /* Replace jump with break */
514                         arch_unoptimize_kprobe(op);
515                 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
516         }
517 }
518
519 /* Remove optimized instructions */
520 static void __kprobes kill_optimized_kprobe(struct kprobe *p)
521 {
522         struct optimized_kprobe *op;
523
524         op = container_of(p, struct optimized_kprobe, kp);
525         if (!list_empty(&op->list)) {
526                 /* Dequeue from the optimization queue */
527                 list_del_init(&op->list);
528                 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
529         }
530         /* Don't unoptimize, because the target code will be freed. */
531         arch_remove_optimized_kprobe(op);
532 }
533
534 /* Try to prepare optimized instructions */
535 static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
536 {
537         struct optimized_kprobe *op;
538
539         op = container_of(p, struct optimized_kprobe, kp);
540         arch_prepare_optimized_kprobe(op);
541 }
542
543 /* Free optimized instructions and optimized_kprobe */
544 static __kprobes void free_aggr_kprobe(struct kprobe *p)
545 {
546         struct optimized_kprobe *op;
547
548         op = container_of(p, struct optimized_kprobe, kp);
549         arch_remove_optimized_kprobe(op);
550         kfree(op);
551 }
552
553 /* Allocate new optimized_kprobe and try to prepare optimized instructions */
554 static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
555 {
556         struct optimized_kprobe *op;
557
558         op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
559         if (!op)
560                 return NULL;
561
562         INIT_LIST_HEAD(&op->list);
563         op->kp.addr = p->addr;
564         arch_prepare_optimized_kprobe(op);
565
566         return &op->kp;
567 }
568
569 static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
570
571 /*
572  * Prepare an optimized_kprobe and optimize it
573  * NOTE: p must be a normal registered kprobe
574  */
575 static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
576 {
577         struct kprobe *ap;
578         struct optimized_kprobe *op;
579
580         ap = alloc_aggr_kprobe(p);
581         if (!ap)
582                 return;
583
584         op = container_of(ap, struct optimized_kprobe, kp);
585         if (!arch_prepared_optinsn(&op->optinsn)) {
586                 /* If failed to setup optimizing, fallback to kprobe */
587                 free_aggr_kprobe(ap);
588                 return;
589         }
590
591         init_aggr_kprobe(ap, p);
592         optimize_kprobe(ap);
593 }
594
595 #ifdef CONFIG_SYSCTL
596 static void __kprobes optimize_all_kprobes(void)
597 {
598         struct hlist_head *head;
599         struct hlist_node *node;
600         struct kprobe *p;
601         unsigned int i;
602
603         /* If optimization is already allowed, just return */
604         if (kprobes_allow_optimization)
605                 return;
606
607         kprobes_allow_optimization = true;
608         mutex_lock(&text_mutex);
609         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
610                 head = &kprobe_table[i];
611                 hlist_for_each_entry_rcu(p, node, head, hlist)
612                         if (!kprobe_disabled(p))
613                                 optimize_kprobe(p);
614         }
615         mutex_unlock(&text_mutex);
616         printk(KERN_INFO "Kprobes globally optimized\n");
617 }
618
619 static void __kprobes unoptimize_all_kprobes(void)
620 {
621         struct hlist_head *head;
622         struct hlist_node *node;
623         struct kprobe *p;
624         unsigned int i;
625
626         /* If optimization is already prohibited, just return */
627         if (!kprobes_allow_optimization)
628                 return;
629
630         kprobes_allow_optimization = false;
631         printk(KERN_INFO "Kprobes globally unoptimized\n");
632         get_online_cpus();      /* For avoiding text_mutex deadlock */
633         mutex_lock(&text_mutex);
634         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
635                 head = &kprobe_table[i];
636                 hlist_for_each_entry_rcu(p, node, head, hlist) {
637                         if (!kprobe_disabled(p))
638                                 unoptimize_kprobe(p);
639                 }
640         }
641
642         mutex_unlock(&text_mutex);
643         put_online_cpus();
644         /* Allow all currently running kprobes to complete */
645         synchronize_sched();
646 }
647
648 int sysctl_kprobes_optimization;
649 int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
650                                       void __user *buffer, size_t *length,
651                                       loff_t *ppos)
652 {
653         int ret;
654
655         mutex_lock(&kprobe_mutex);
656         sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
657         ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
658
659         if (sysctl_kprobes_optimization)
660                 optimize_all_kprobes();
661         else
662                 unoptimize_all_kprobes();
663         mutex_unlock(&kprobe_mutex);
664
665         return ret;
666 }
667 #endif /* CONFIG_SYSCTL */
668
669 static void __kprobes __arm_kprobe(struct kprobe *p)
670 {
671         struct kprobe *old_p;
672
673         /* Check collision with other optimized kprobes */
674         old_p = get_optimized_kprobe((unsigned long)p->addr);
675         if (unlikely(old_p))
676                 unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
677
678         arch_arm_kprobe(p);
679         optimize_kprobe(p);     /* Try to optimize (add kprobe to a list) */
680 }
681
682 static void __kprobes __disarm_kprobe(struct kprobe *p)
683 {
684         struct kprobe *old_p;
685
686         unoptimize_kprobe(p);   /* Try to unoptimize */
687         arch_disarm_kprobe(p);
688
689         /* If another kprobe was blocked, optimize it. */
690         old_p = get_optimized_kprobe((unsigned long)p->addr);
691         if (unlikely(old_p))
692                 optimize_kprobe(old_p);
693 }
694
695 #else /* !CONFIG_OPTPROBES */
696
697 #define optimize_kprobe(p)                      do {} while (0)
698 #define unoptimize_kprobe(p)                    do {} while (0)
699 #define kill_optimized_kprobe(p)                do {} while (0)
700 #define prepare_optimized_kprobe(p)             do {} while (0)
701 #define try_to_optimize_kprobe(p)               do {} while (0)
702 #define __arm_kprobe(p)                         arch_arm_kprobe(p)
703 #define __disarm_kprobe(p)                      arch_disarm_kprobe(p)
704
705 static __kprobes void free_aggr_kprobe(struct kprobe *p)
706 {
707         kfree(p);
708 }
709
710 static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
711 {
712         return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
713 }
714 #endif /* CONFIG_OPTPROBES */
715
716 /* Arm a kprobe with text_mutex */
717 static void __kprobes arm_kprobe(struct kprobe *kp)
718 {
719         /*
720          * Here, since __arm_kprobe() doesn't use stop_machine(),
721          * this doesn't cause deadlock on text_mutex. So, we don't
722          * need get_online_cpus().
723          */
724         mutex_lock(&text_mutex);
725         __arm_kprobe(kp);
726         mutex_unlock(&text_mutex);
727 }
728
729 /* Disarm a kprobe with text_mutex */
730 static void __kprobes disarm_kprobe(struct kprobe *kp)
731 {
732         get_online_cpus();      /* For avoiding text_mutex deadlock */
733         mutex_lock(&text_mutex);
734         __disarm_kprobe(kp);
735         mutex_unlock(&text_mutex);
736         put_online_cpus();
737 }
738
739 /*
740  * Aggregate handlers for multiple kprobes support - these handlers
741  * take care of invoking the individual kprobe handlers on p->list
742  */
743 static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
744 {
745         struct kprobe *kp;
746
747         list_for_each_entry_rcu(kp, &p->list, list) {
748                 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
749                         set_kprobe_instance(kp);
750                         if (kp->pre_handler(kp, regs))
751                                 return 1;
752                 }
753                 reset_kprobe_instance();
754         }
755         return 0;
756 }
757
758 static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
759                                         unsigned long flags)
760 {
761         struct kprobe *kp;
762
763         list_for_each_entry_rcu(kp, &p->list, list) {
764                 if (kp->post_handler && likely(!kprobe_disabled(kp))) {
765                         set_kprobe_instance(kp);
766                         kp->post_handler(kp, regs, flags);
767                         reset_kprobe_instance();
768                 }
769         }
770 }
771
772 static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
773                                         int trapnr)
774 {
775         struct kprobe *cur = __get_cpu_var(kprobe_instance);
776
777         /*
778          * if we faulted "during" the execution of a user specified
779          * probe handler, invoke just that probe's fault handler
780          */
781         if (cur && cur->fault_handler) {
782                 if (cur->fault_handler(cur, regs, trapnr))
783                         return 1;
784         }
785         return 0;
786 }
787
788 static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
789 {
790         struct kprobe *cur = __get_cpu_var(kprobe_instance);
791         int ret = 0;
792
793         if (cur && cur->break_handler) {
794                 if (cur->break_handler(cur, regs))
795                         ret = 1;
796         }
797         reset_kprobe_instance();
798         return ret;
799 }
800
801 /* Walks the list and increments nmissed count for multiprobe case */
802 void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
803 {
804         struct kprobe *kp;
805         if (!kprobe_aggrprobe(p)) {
806                 p->nmissed++;
807         } else {
808                 list_for_each_entry_rcu(kp, &p->list, list)
809                         kp->nmissed++;
810         }
811         return;
812 }
813
814 void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
815                                 struct hlist_head *head)
816 {
817         struct kretprobe *rp = ri->rp;
818
819         /* remove rp inst off the rprobe_inst_table */
820         hlist_del(&ri->hlist);
821         INIT_HLIST_NODE(&ri->hlist);
822         if (likely(rp)) {
823                 spin_lock(&rp->lock);
824                 hlist_add_head(&ri->hlist, &rp->free_instances);
825                 spin_unlock(&rp->lock);
826         } else
827                 /* Unregistering */
828                 hlist_add_head(&ri->hlist, head);
829 }
830
831 void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
832                          struct hlist_head **head, unsigned long *flags)
833 {
834         unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
835         spinlock_t *hlist_lock;
836
837         *head = &kretprobe_inst_table[hash];
838         hlist_lock = kretprobe_table_lock_ptr(hash);
839         spin_lock_irqsave(hlist_lock, *flags);
840 }
841
842 static void __kprobes kretprobe_table_lock(unsigned long hash,
843         unsigned long *flags)
844 {
845         spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
846         spin_lock_irqsave(hlist_lock, *flags);
847 }
848
849 void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
850         unsigned long *flags)
851 {
852         unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
853         spinlock_t *hlist_lock;
854
855         hlist_lock = kretprobe_table_lock_ptr(hash);
856         spin_unlock_irqrestore(hlist_lock, *flags);
857 }
858
859 void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
860 {
861         spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
862         spin_unlock_irqrestore(hlist_lock, *flags);
863 }
864
865 /*
866  * This function is called from finish_task_switch when task tk becomes dead,
867  * so that we can recycle any function-return probe instances associated
868  * with this task. These left over instances represent probed functions
869  * that have been called but will never return.
870  */
871 void __kprobes kprobe_flush_task(struct task_struct *tk)
872 {
873         struct kretprobe_instance *ri;
874         struct hlist_head *head, empty_rp;
875         struct hlist_node *node, *tmp;
876         unsigned long hash, flags = 0;
877
878         if (unlikely(!kprobes_initialized))
879                 /* Early boot.  kretprobe_table_locks not yet initialized. */
880                 return;
881
882         hash = hash_ptr(tk, KPROBE_HASH_BITS);
883         head = &kretprobe_inst_table[hash];
884         kretprobe_table_lock(hash, &flags);
885         hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
886                 if (ri->task == tk)
887                         recycle_rp_inst(ri, &empty_rp);
888         }
889         kretprobe_table_unlock(hash, &flags);
890         INIT_HLIST_HEAD(&empty_rp);
891         hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
892                 hlist_del(&ri->hlist);
893                 kfree(ri);
894         }
895 }
896
897 static inline void free_rp_inst(struct kretprobe *rp)
898 {
899         struct kretprobe_instance *ri;
900         struct hlist_node *pos, *next;
901
902         hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
903                 hlist_del(&ri->hlist);
904                 kfree(ri);
905         }
906 }
907
908 static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
909 {
910         unsigned long flags, hash;
911         struct kretprobe_instance *ri;
912         struct hlist_node *pos, *next;
913         struct hlist_head *head;
914
915         /* No race here */
916         for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
917                 kretprobe_table_lock(hash, &flags);
918                 head = &kretprobe_inst_table[hash];
919                 hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
920                         if (ri->rp == rp)
921                                 ri->rp = NULL;
922                 }
923                 kretprobe_table_unlock(hash, &flags);
924         }
925         free_rp_inst(rp);
926 }
927
928 /*
929 * Add the new probe to ap->list. Fail if this is the
930 * second jprobe at the address - two jprobes can't coexist
931 */
932 static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
933 {
934         BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
935
936         if (p->break_handler || p->post_handler)
937                 unoptimize_kprobe(ap);  /* Fall back to normal kprobe */
938
939         if (p->break_handler) {
940                 if (ap->break_handler)
941                         return -EEXIST;
942                 list_add_tail_rcu(&p->list, &ap->list);
943                 ap->break_handler = aggr_break_handler;
944         } else
945                 list_add_rcu(&p->list, &ap->list);
946         if (p->post_handler && !ap->post_handler)
947                 ap->post_handler = aggr_post_handler;
948
949         if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
950                 ap->flags &= ~KPROBE_FLAG_DISABLED;
951                 if (!kprobes_all_disarmed)
952                         /* Arm the breakpoint again. */
953                         __arm_kprobe(ap);
954         }
955         return 0;
956 }
957
958 /*
959  * Fill in the required fields of the "manager kprobe". Replace the
960  * earlier kprobe in the hlist with the manager kprobe
961  */
962 static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
963 {
964         /* Copy p's insn slot to ap */
965         copy_kprobe(p, ap);
966         flush_insn_slot(ap);
967         ap->addr = p->addr;
968         ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
969         ap->pre_handler = aggr_pre_handler;
970         ap->fault_handler = aggr_fault_handler;
971         /* We don't care the kprobe which has gone. */
972         if (p->post_handler && !kprobe_gone(p))
973                 ap->post_handler = aggr_post_handler;
974         if (p->break_handler && !kprobe_gone(p))
975                 ap->break_handler = aggr_break_handler;
976
977         INIT_LIST_HEAD(&ap->list);
978         INIT_HLIST_NODE(&ap->hlist);
979
980         list_add_rcu(&p->list, &ap->list);
981         hlist_replace_rcu(&p->hlist, &ap->hlist);
982 }
983
984 /*
985  * This is the second or subsequent kprobe at the address - handle
986  * the intricacies
987  */
988 static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
989                                           struct kprobe *p)
990 {
991         int ret = 0;
992         struct kprobe *ap = old_p;
993
994         if (!kprobe_aggrprobe(old_p)) {
995                 /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
996                 ap = alloc_aggr_kprobe(old_p);
997                 if (!ap)
998                         return -ENOMEM;
999                 init_aggr_kprobe(ap, old_p);
1000         }
1001
1002         if (kprobe_gone(ap)) {
1003                 /*
1004                  * Attempting to insert new probe at the same location that
1005                  * had a probe in the module vaddr area which already
1006                  * freed. So, the instruction slot has already been
1007                  * released. We need a new slot for the new probe.
1008                  */
1009                 ret = arch_prepare_kprobe(ap);
1010                 if (ret)
1011                         /*
1012                          * Even if fail to allocate new slot, don't need to
1013                          * free aggr_probe. It will be used next time, or
1014                          * freed by unregister_kprobe.
1015                          */
1016                         return ret;
1017
1018                 /* Prepare optimized instructions if possible. */
1019                 prepare_optimized_kprobe(ap);
1020
1021                 /*
1022                  * Clear gone flag to prevent allocating new slot again, and
1023                  * set disabled flag because it is not armed yet.
1024                  */
1025                 ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
1026                             | KPROBE_FLAG_DISABLED;
1027         }
1028
1029         /* Copy ap's insn slot to p */
1030         copy_kprobe(ap, p);
1031         return add_new_kprobe(ap, p);
1032 }
1033
1034 /* Try to disable aggr_kprobe, and return 1 if succeeded.*/
1035 static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
1036 {
1037         struct kprobe *kp;
1038
1039         list_for_each_entry_rcu(kp, &p->list, list) {
1040                 if (!kprobe_disabled(kp))
1041                         /*
1042                          * There is an active probe on the list.
1043                          * We can't disable aggr_kprobe.
1044                          */
1045                         return 0;
1046         }
1047         p->flags |= KPROBE_FLAG_DISABLED;
1048         return 1;
1049 }
1050
1051 static int __kprobes in_kprobes_functions(unsigned long addr)
1052 {
1053         struct kprobe_blackpoint *kb;
1054
1055         if (addr >= (unsigned long)__kprobes_text_start &&
1056             addr < (unsigned long)__kprobes_text_end)
1057                 return -EINVAL;
1058         /*
1059          * If there exists a kprobe_blacklist, verify and
1060          * fail any probe registration in the prohibited area
1061          */
1062         for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
1063                 if (kb->start_addr) {
1064                         if (addr >= kb->start_addr &&
1065                             addr < (kb->start_addr + kb->range))
1066                                 return -EINVAL;
1067                 }
1068         }
1069         return 0;
1070 }
1071
1072 /*
1073  * If we have a symbol_name argument, look it up and add the offset field
1074  * to it. This way, we can specify a relative address to a symbol.
1075  */
1076 static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
1077 {
1078         kprobe_opcode_t *addr = p->addr;
1079         if (p->symbol_name) {
1080                 if (addr)
1081                         return NULL;
1082                 kprobe_lookup_name(p->symbol_name, addr);
1083         }
1084
1085         if (!addr)
1086                 return NULL;
1087         return (kprobe_opcode_t *)(((char *)addr) + p->offset);
1088 }
1089
1090 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
1091 static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
1092 {
1093         struct kprobe *old_p, *list_p;
1094
1095         old_p = get_kprobe(p->addr);
1096         if (unlikely(!old_p))
1097                 return NULL;
1098
1099         if (p != old_p) {
1100                 list_for_each_entry_rcu(list_p, &old_p->list, list)
1101                         if (list_p == p)
1102                         /* kprobe p is a valid probe */
1103                                 goto valid;
1104                 return NULL;
1105         }
1106 valid:
1107         return old_p;
1108 }
1109
1110 /* Return error if the kprobe is being re-registered */
1111 static inline int check_kprobe_rereg(struct kprobe *p)
1112 {
1113         int ret = 0;
1114         struct kprobe *old_p;
1115
1116         mutex_lock(&kprobe_mutex);
1117         old_p = __get_valid_kprobe(p);
1118         if (old_p)
1119                 ret = -EINVAL;
1120         mutex_unlock(&kprobe_mutex);
1121         return ret;
1122 }
1123
1124 int __kprobes register_kprobe(struct kprobe *p)
1125 {
1126         int ret = 0;
1127         struct kprobe *old_p;
1128         struct module *probed_mod;
1129         kprobe_opcode_t *addr;
1130
1131         addr = kprobe_addr(p);
1132         if (!addr)
1133                 return -EINVAL;
1134         p->addr = addr;
1135
1136         ret = check_kprobe_rereg(p);
1137         if (ret)
1138                 return ret;
1139
1140         preempt_disable();
1141         if (!kernel_text_address((unsigned long) p->addr) ||
1142             in_kprobes_functions((unsigned long) p->addr) ||
1143             ftrace_text_reserved(p->addr, p->addr)) {
1144                 preempt_enable();
1145                 return -EINVAL;
1146         }
1147
1148         /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1149         p->flags &= KPROBE_FLAG_DISABLED;
1150
1151         /*
1152          * Check if are we probing a module.
1153          */
1154         probed_mod = __module_text_address((unsigned long) p->addr);
1155         if (probed_mod) {
1156                 /*
1157                  * We must hold a refcount of the probed module while updating
1158                  * its code to prohibit unexpected unloading.
1159                  */
1160                 if (unlikely(!try_module_get(probed_mod))) {
1161                         preempt_enable();
1162                         return -EINVAL;
1163                 }
1164                 /*
1165                  * If the module freed .init.text, we couldn't insert
1166                  * kprobes in there.
1167                  */
1168                 if (within_module_init((unsigned long)p->addr, probed_mod) &&
1169                     probed_mod->state != MODULE_STATE_COMING) {
1170                         module_put(probed_mod);
1171                         preempt_enable();
1172                         return -EINVAL;
1173                 }
1174         }
1175         preempt_enable();
1176
1177         p->nmissed = 0;
1178         INIT_LIST_HEAD(&p->list);
1179         mutex_lock(&kprobe_mutex);
1180
1181         get_online_cpus();      /* For avoiding text_mutex deadlock. */
1182         mutex_lock(&text_mutex);
1183
1184         old_p = get_kprobe(p->addr);
1185         if (old_p) {
1186                 /* Since this may unoptimize old_p, locking text_mutex. */
1187                 ret = register_aggr_kprobe(old_p, p);
1188                 goto out;
1189         }
1190
1191         ret = arch_prepare_kprobe(p);
1192         if (ret)
1193                 goto out;
1194
1195         INIT_HLIST_NODE(&p->hlist);
1196         hlist_add_head_rcu(&p->hlist,
1197                        &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1198
1199         if (!kprobes_all_disarmed && !kprobe_disabled(p))
1200                 __arm_kprobe(p);
1201
1202         /* Try to optimize kprobe */
1203         try_to_optimize_kprobe(p);
1204
1205 out:
1206         mutex_unlock(&text_mutex);
1207         put_online_cpus();
1208         mutex_unlock(&kprobe_mutex);
1209
1210         if (probed_mod)
1211                 module_put(probed_mod);
1212
1213         return ret;
1214 }
1215 EXPORT_SYMBOL_GPL(register_kprobe);
1216
1217 /*
1218  * Unregister a kprobe without a scheduler synchronization.
1219  */
1220 static int __kprobes __unregister_kprobe_top(struct kprobe *p)
1221 {
1222         struct kprobe *old_p, *list_p;
1223
1224         old_p = __get_valid_kprobe(p);
1225         if (old_p == NULL)
1226                 return -EINVAL;
1227
1228         if (old_p == p ||
1229             (kprobe_aggrprobe(old_p) &&
1230              list_is_singular(&old_p->list))) {
1231                 /*
1232                  * Only probe on the hash list. Disarm only if kprobes are
1233                  * enabled and not gone - otherwise, the breakpoint would
1234                  * already have been removed. We save on flushing icache.
1235                  */
1236                 if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
1237                         disarm_kprobe(old_p);
1238                 hlist_del_rcu(&old_p->hlist);
1239         } else {
1240                 if (p->break_handler && !kprobe_gone(p))
1241                         old_p->break_handler = NULL;
1242                 if (p->post_handler && !kprobe_gone(p)) {
1243                         list_for_each_entry_rcu(list_p, &old_p->list, list) {
1244                                 if ((list_p != p) && (list_p->post_handler))
1245                                         goto noclean;
1246                         }
1247                         old_p->post_handler = NULL;
1248                 }
1249 noclean:
1250                 list_del_rcu(&p->list);
1251                 if (!kprobe_disabled(old_p)) {
1252                         try_to_disable_aggr_kprobe(old_p);
1253                         if (!kprobes_all_disarmed) {
1254                                 if (kprobe_disabled(old_p))
1255                                         disarm_kprobe(old_p);
1256                                 else
1257                                         /* Try to optimize this probe again */
1258                                         optimize_kprobe(old_p);
1259                         }
1260                 }
1261         }
1262         return 0;
1263 }
1264
1265 static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
1266 {
1267         struct kprobe *old_p;
1268
1269         if (list_empty(&p->list))
1270                 arch_remove_kprobe(p);
1271         else if (list_is_singular(&p->list)) {
1272                 /* "p" is the last child of an aggr_kprobe */
1273                 old_p = list_entry(p->list.next, struct kprobe, list);
1274                 list_del(&p->list);
1275                 arch_remove_kprobe(old_p);
1276                 free_aggr_kprobe(old_p);
1277         }
1278 }
1279
1280 int __kprobes register_kprobes(struct kprobe **kps, int num)
1281 {
1282         int i, ret = 0;
1283
1284         if (num <= 0)
1285                 return -EINVAL;
1286         for (i = 0; i < num; i++) {
1287                 ret = register_kprobe(kps[i]);
1288                 if (ret < 0) {
1289                         if (i > 0)
1290                                 unregister_kprobes(kps, i);
1291                         break;
1292                 }
1293         }
1294         return ret;
1295 }
1296 EXPORT_SYMBOL_GPL(register_kprobes);
1297
1298 void __kprobes unregister_kprobe(struct kprobe *p)
1299 {
1300         unregister_kprobes(&p, 1);
1301 }
1302 EXPORT_SYMBOL_GPL(unregister_kprobe);
1303
1304 void __kprobes unregister_kprobes(struct kprobe **kps, int num)
1305 {
1306         int i;
1307
1308         if (num <= 0)
1309                 return;
1310         mutex_lock(&kprobe_mutex);
1311         for (i = 0; i < num; i++)
1312                 if (__unregister_kprobe_top(kps[i]) < 0)
1313                         kps[i]->addr = NULL;
1314         mutex_unlock(&kprobe_mutex);
1315
1316         synchronize_sched();
1317         for (i = 0; i < num; i++)
1318                 if (kps[i]->addr)
1319                         __unregister_kprobe_bottom(kps[i]);
1320 }
1321 EXPORT_SYMBOL_GPL(unregister_kprobes);
1322
1323 static struct notifier_block kprobe_exceptions_nb = {
1324         .notifier_call = kprobe_exceptions_notify,
1325         .priority = 0x7fffffff /* we need to be notified first */
1326 };
1327
1328 unsigned long __weak arch_deref_entry_point(void *entry)
1329 {
1330         return (unsigned long)entry;
1331 }
1332
1333 int __kprobes register_jprobes(struct jprobe **jps, int num)
1334 {
1335         struct jprobe *jp;
1336         int ret = 0, i;
1337
1338         if (num <= 0)
1339                 return -EINVAL;
1340         for (i = 0; i < num; i++) {
1341                 unsigned long addr;
1342                 jp = jps[i];
1343                 addr = arch_deref_entry_point(jp->entry);
1344
1345                 if (!kernel_text_address(addr))
1346                         ret = -EINVAL;
1347                 else {
1348                         /* Todo: Verify probepoint is a function entry point */
1349                         jp->kp.pre_handler = setjmp_pre_handler;
1350                         jp->kp.break_handler = longjmp_break_handler;
1351                         ret = register_kprobe(&jp->kp);
1352                 }
1353                 if (ret < 0) {
1354                         if (i > 0)
1355                                 unregister_jprobes(jps, i);
1356                         break;
1357                 }
1358         }
1359         return ret;
1360 }
1361 EXPORT_SYMBOL_GPL(register_jprobes);
1362
1363 int __kprobes register_jprobe(struct jprobe *jp)
1364 {
1365         return register_jprobes(&jp, 1);
1366 }
1367 EXPORT_SYMBOL_GPL(register_jprobe);
1368
1369 void __kprobes unregister_jprobe(struct jprobe *jp)
1370 {
1371         unregister_jprobes(&jp, 1);
1372 }
1373 EXPORT_SYMBOL_GPL(unregister_jprobe);
1374
1375 void __kprobes unregister_jprobes(struct jprobe **jps, int num)
1376 {
1377         int i;
1378
1379         if (num <= 0)
1380                 return;
1381         mutex_lock(&kprobe_mutex);
1382         for (i = 0; i < num; i++)
1383                 if (__unregister_kprobe_top(&jps[i]->kp) < 0)
1384                         jps[i]->kp.addr = NULL;
1385         mutex_unlock(&kprobe_mutex);
1386
1387         synchronize_sched();
1388         for (i = 0; i < num; i++) {
1389                 if (jps[i]->kp.addr)
1390                         __unregister_kprobe_bottom(&jps[i]->kp);
1391         }
1392 }
1393 EXPORT_SYMBOL_GPL(unregister_jprobes);
1394
1395 #ifdef CONFIG_KRETPROBES
1396 /*
1397  * This kprobe pre_handler is registered with every kretprobe. When probe
1398  * hits it will set up the return probe.
1399  */
1400 static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1401                                            struct pt_regs *regs)
1402 {
1403         struct kretprobe *rp = container_of(p, struct kretprobe, kp);
1404         unsigned long hash, flags = 0;
1405         struct kretprobe_instance *ri;
1406
1407         /*TODO: consider to only swap the RA after the last pre_handler fired */
1408         hash = hash_ptr(current, KPROBE_HASH_BITS);
1409         spin_lock_irqsave(&rp->lock, flags);
1410         if (!hlist_empty(&rp->free_instances)) {
1411                 ri = hlist_entry(rp->free_instances.first,
1412                                 struct kretprobe_instance, hlist);
1413                 hlist_del(&ri->hlist);
1414                 spin_unlock_irqrestore(&rp->lock, flags);
1415
1416                 ri->rp = rp;
1417                 ri->task = current;
1418
1419                 if (rp->entry_handler && rp->entry_handler(ri, regs))
1420                         return 0;
1421
1422                 arch_prepare_kretprobe(ri, regs);
1423
1424                 /* XXX(hch): why is there no hlist_move_head? */
1425                 INIT_HLIST_NODE(&ri->hlist);
1426                 kretprobe_table_lock(hash, &flags);
1427                 hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
1428                 kretprobe_table_unlock(hash, &flags);
1429         } else {
1430                 rp->nmissed++;
1431                 spin_unlock_irqrestore(&rp->lock, flags);
1432         }
1433         return 0;
1434 }
1435
1436 int __kprobes register_kretprobe(struct kretprobe *rp)
1437 {
1438         int ret = 0;
1439         struct kretprobe_instance *inst;
1440         int i;
1441         void *addr;
1442
1443         if (kretprobe_blacklist_size) {
1444                 addr = kprobe_addr(&rp->kp);
1445                 if (!addr)
1446                         return -EINVAL;
1447
1448                 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
1449                         if (kretprobe_blacklist[i].addr == addr)
1450                                 return -EINVAL;
1451                 }
1452         }
1453
1454         rp->kp.pre_handler = pre_handler_kretprobe;
1455         rp->kp.post_handler = NULL;
1456         rp->kp.fault_handler = NULL;
1457         rp->kp.break_handler = NULL;
1458
1459         /* Pre-allocate memory for max kretprobe instances */
1460         if (rp->maxactive <= 0) {
1461 #ifdef CONFIG_PREEMPT
1462                 rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
1463 #else
1464                 rp->maxactive = num_possible_cpus();
1465 #endif
1466         }
1467         spin_lock_init(&rp->lock);
1468         INIT_HLIST_HEAD(&rp->free_instances);
1469         for (i = 0; i < rp->maxactive; i++) {
1470                 inst = kmalloc(sizeof(struct kretprobe_instance) +
1471                                rp->data_size, GFP_KERNEL);
1472                 if (inst == NULL) {
1473                         free_rp_inst(rp);
1474                         return -ENOMEM;
1475                 }
1476                 INIT_HLIST_NODE(&inst->hlist);
1477                 hlist_add_head(&inst->hlist, &rp->free_instances);
1478         }
1479
1480         rp->nmissed = 0;
1481         /* Establish function entry probe point */
1482         ret = register_kprobe(&rp->kp);
1483         if (ret != 0)
1484                 free_rp_inst(rp);
1485         return ret;
1486 }
1487 EXPORT_SYMBOL_GPL(register_kretprobe);
1488
1489 int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1490 {
1491         int ret = 0, i;
1492
1493         if (num <= 0)
1494                 return -EINVAL;
1495         for (i = 0; i < num; i++) {
1496                 ret = register_kretprobe(rps[i]);
1497                 if (ret < 0) {
1498                         if (i > 0)
1499                                 unregister_kretprobes(rps, i);
1500                         break;
1501                 }
1502         }
1503         return ret;
1504 }
1505 EXPORT_SYMBOL_GPL(register_kretprobes);
1506
1507 void __kprobes unregister_kretprobe(struct kretprobe *rp)
1508 {
1509         unregister_kretprobes(&rp, 1);
1510 }
1511 EXPORT_SYMBOL_GPL(unregister_kretprobe);
1512
1513 void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1514 {
1515         int i;
1516
1517         if (num <= 0)
1518                 return;
1519         mutex_lock(&kprobe_mutex);
1520         for (i = 0; i < num; i++)
1521                 if (__unregister_kprobe_top(&rps[i]->kp) < 0)
1522                         rps[i]->kp.addr = NULL;
1523         mutex_unlock(&kprobe_mutex);
1524
1525         synchronize_sched();
1526         for (i = 0; i < num; i++) {
1527                 if (rps[i]->kp.addr) {
1528                         __unregister_kprobe_bottom(&rps[i]->kp);
1529                         cleanup_rp_inst(rps[i]);
1530                 }
1531         }
1532 }
1533 EXPORT_SYMBOL_GPL(unregister_kretprobes);
1534
1535 #else /* CONFIG_KRETPROBES */
1536 int __kprobes register_kretprobe(struct kretprobe *rp)
1537 {
1538         return -ENOSYS;
1539 }
1540 EXPORT_SYMBOL_GPL(register_kretprobe);
1541
1542 int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1543 {
1544         return -ENOSYS;
1545 }
1546 EXPORT_SYMBOL_GPL(register_kretprobes);
1547
1548 void __kprobes unregister_kretprobe(struct kretprobe *rp)
1549 {
1550 }
1551 EXPORT_SYMBOL_GPL(unregister_kretprobe);
1552
1553 void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1554 {
1555 }
1556 EXPORT_SYMBOL_GPL(unregister_kretprobes);
1557
1558 static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1559                                            struct pt_regs *regs)
1560 {
1561         return 0;
1562 }
1563
1564 #endif /* CONFIG_KRETPROBES */
1565
1566 /* Set the kprobe gone and remove its instruction buffer. */
1567 static void __kprobes kill_kprobe(struct kprobe *p)
1568 {
1569         struct kprobe *kp;
1570
1571         p->flags |= KPROBE_FLAG_GONE;
1572         if (kprobe_aggrprobe(p)) {
1573                 /*
1574                  * If this is an aggr_kprobe, we have to list all the
1575                  * chained probes and mark them GONE.
1576                  */
1577                 list_for_each_entry_rcu(kp, &p->list, list)
1578                         kp->flags |= KPROBE_FLAG_GONE;
1579                 p->post_handler = NULL;
1580                 p->break_handler = NULL;
1581                 kill_optimized_kprobe(p);
1582         }
1583         /*
1584          * Here, we can remove insn_slot safely, because no thread calls
1585          * the original probed function (which will be freed soon) any more.
1586          */
1587         arch_remove_kprobe(p);
1588 }
1589
1590 void __kprobes dump_kprobe(struct kprobe *kp)
1591 {
1592         printk(KERN_WARNING "Dumping kprobe:\n");
1593         printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
1594                kp->symbol_name, kp->addr, kp->offset);
1595 }
1596
1597 /* Module notifier call back, checking kprobes on the module */
1598 static int __kprobes kprobes_module_callback(struct notifier_block *nb,
1599                                              unsigned long val, void *data)
1600 {
1601         struct module *mod = data;
1602         struct hlist_head *head;
1603         struct hlist_node *node;
1604         struct kprobe *p;
1605         unsigned int i;
1606         int checkcore = (val == MODULE_STATE_GOING);
1607
1608         if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
1609                 return NOTIFY_DONE;
1610
1611         /*
1612          * When MODULE_STATE_GOING was notified, both of module .text and
1613          * .init.text sections would be freed. When MODULE_STATE_LIVE was
1614          * notified, only .init.text section would be freed. We need to
1615          * disable kprobes which have been inserted in the sections.
1616          */
1617         mutex_lock(&kprobe_mutex);
1618         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1619                 head = &kprobe_table[i];
1620                 hlist_for_each_entry_rcu(p, node, head, hlist)
1621                         if (within_module_init((unsigned long)p->addr, mod) ||
1622                             (checkcore &&
1623                              within_module_core((unsigned long)p->addr, mod))) {
1624                                 /*
1625                                  * The vaddr this probe is installed will soon
1626                                  * be vfreed buy not synced to disk. Hence,
1627                                  * disarming the breakpoint isn't needed.
1628                                  */
1629                                 kill_kprobe(p);
1630                         }
1631         }
1632         mutex_unlock(&kprobe_mutex);
1633         return NOTIFY_DONE;
1634 }
1635
1636 static struct notifier_block kprobe_module_nb = {
1637         .notifier_call = kprobes_module_callback,
1638         .priority = 0
1639 };
1640
1641 static int __init init_kprobes(void)
1642 {
1643         int i, err = 0;
1644         unsigned long offset = 0, size = 0;
1645         char *modname, namebuf[128];
1646         const char *symbol_name;
1647         void *addr;
1648         struct kprobe_blackpoint *kb;
1649
1650         /* FIXME allocate the probe table, currently defined statically */
1651         /* initialize all list heads */
1652         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1653                 INIT_HLIST_HEAD(&kprobe_table[i]);
1654                 INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
1655                 spin_lock_init(&(kretprobe_table_locks[i].lock));
1656         }
1657
1658         /*
1659          * Lookup and populate the kprobe_blacklist.
1660          *
1661          * Unlike the kretprobe blacklist, we'll need to determine
1662          * the range of addresses that belong to the said functions,
1663          * since a kprobe need not necessarily be at the beginning
1664          * of a function.
1665          */
1666         for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
1667                 kprobe_lookup_name(kb->name, addr);
1668                 if (!addr)
1669                         continue;
1670
1671                 kb->start_addr = (unsigned long)addr;
1672                 symbol_name = kallsyms_lookup(kb->start_addr,
1673                                 &size, &offset, &modname, namebuf);
1674                 if (!symbol_name)
1675                         kb->range = 0;
1676                 else
1677                         kb->range = size;
1678         }
1679
1680         if (kretprobe_blacklist_size) {
1681                 /* lookup the function address from its name */
1682                 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
1683                         kprobe_lookup_name(kretprobe_blacklist[i].name,
1684                                            kretprobe_blacklist[i].addr);
1685                         if (!kretprobe_blacklist[i].addr)
1686                                 printk("kretprobe: lookup failed: %s\n",
1687                                        kretprobe_blacklist[i].name);
1688                 }
1689         }
1690
1691 #if defined(CONFIG_OPTPROBES)
1692 #if defined(__ARCH_WANT_KPROBES_INSN_SLOT)
1693         /* Init kprobe_optinsn_slots */
1694         kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
1695 #endif
1696         /* By default, kprobes can be optimized */
1697         kprobes_allow_optimization = true;
1698 #endif
1699
1700         /* By default, kprobes are armed */
1701         kprobes_all_disarmed = false;
1702
1703         err = arch_init_kprobes();
1704         if (!err)
1705                 err = register_die_notifier(&kprobe_exceptions_nb);
1706         if (!err)
1707                 err = register_module_notifier(&kprobe_module_nb);
1708
1709         kprobes_initialized = (err == 0);
1710
1711         if (!err)
1712                 init_test_probes();
1713         return err;
1714 }
1715
1716 #ifdef CONFIG_DEBUG_FS
1717 static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
1718                 const char *sym, int offset, char *modname, struct kprobe *pp)
1719 {
1720         char *kprobe_type;
1721
1722         if (p->pre_handler == pre_handler_kretprobe)
1723                 kprobe_type = "r";
1724         else if (p->pre_handler == setjmp_pre_handler)
1725                 kprobe_type = "j";
1726         else
1727                 kprobe_type = "k";
1728
1729         if (sym)
1730                 seq_printf(pi, "%p  %s  %s+0x%x  %s ",
1731                         p->addr, kprobe_type, sym, offset,
1732                         (modname ? modname : " "));
1733         else
1734                 seq_printf(pi, "%p  %s  %p ",
1735                         p->addr, kprobe_type, p->addr);
1736
1737         if (!pp)
1738                 pp = p;
1739         seq_printf(pi, "%s%s%s\n",
1740                 (kprobe_gone(p) ? "[GONE]" : ""),
1741                 ((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
1742                 (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""));
1743 }
1744
1745 static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
1746 {
1747         return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
1748 }
1749
1750 static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
1751 {
1752         (*pos)++;
1753         if (*pos >= KPROBE_TABLE_SIZE)
1754                 return NULL;
1755         return pos;
1756 }
1757
1758 static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
1759 {
1760         /* Nothing to do */
1761 }
1762
1763 static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
1764 {
1765         struct hlist_head *head;
1766         struct hlist_node *node;
1767         struct kprobe *p, *kp;
1768         const char *sym = NULL;
1769         unsigned int i = *(loff_t *) v;
1770         unsigned long offset = 0;
1771         char *modname, namebuf[128];
1772
1773         head = &kprobe_table[i];
1774         preempt_disable();
1775         hlist_for_each_entry_rcu(p, node, head, hlist) {
1776                 sym = kallsyms_lookup((unsigned long)p->addr, NULL,
1777                                         &offset, &modname, namebuf);
1778                 if (kprobe_aggrprobe(p)) {
1779                         list_for_each_entry_rcu(kp, &p->list, list)
1780                                 report_probe(pi, kp, sym, offset, modname, p);
1781                 } else
1782                         report_probe(pi, p, sym, offset, modname, NULL);
1783         }
1784         preempt_enable();
1785         return 0;
1786 }
1787
1788 static const struct seq_operations kprobes_seq_ops = {
1789         .start = kprobe_seq_start,
1790         .next  = kprobe_seq_next,
1791         .stop  = kprobe_seq_stop,
1792         .show  = show_kprobe_addr
1793 };
1794
1795 static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
1796 {
1797         return seq_open(filp, &kprobes_seq_ops);
1798 }
1799
1800 static const struct file_operations debugfs_kprobes_operations = {
1801         .open           = kprobes_open,
1802         .read           = seq_read,
1803         .llseek         = seq_lseek,
1804         .release        = seq_release,
1805 };
1806
1807 /* Disable one kprobe */
1808 int __kprobes disable_kprobe(struct kprobe *kp)
1809 {
1810         int ret = 0;
1811         struct kprobe *p;
1812
1813         mutex_lock(&kprobe_mutex);
1814
1815         /* Check whether specified probe is valid. */
1816         p = __get_valid_kprobe(kp);
1817         if (unlikely(p == NULL)) {
1818                 ret = -EINVAL;
1819                 goto out;
1820         }
1821
1822         /* If the probe is already disabled (or gone), just return */
1823         if (kprobe_disabled(kp))
1824                 goto out;
1825
1826         kp->flags |= KPROBE_FLAG_DISABLED;
1827         if (p != kp)
1828                 /* When kp != p, p is always enabled. */
1829                 try_to_disable_aggr_kprobe(p);
1830
1831         if (!kprobes_all_disarmed && kprobe_disabled(p))
1832                 disarm_kprobe(p);
1833 out:
1834         mutex_unlock(&kprobe_mutex);
1835         return ret;
1836 }
1837 EXPORT_SYMBOL_GPL(disable_kprobe);
1838
1839 /* Enable one kprobe */
1840 int __kprobes enable_kprobe(struct kprobe *kp)
1841 {
1842         int ret = 0;
1843         struct kprobe *p;
1844
1845         mutex_lock(&kprobe_mutex);
1846
1847         /* Check whether specified probe is valid. */
1848         p = __get_valid_kprobe(kp);
1849         if (unlikely(p == NULL)) {
1850                 ret = -EINVAL;
1851                 goto out;
1852         }
1853
1854         if (kprobe_gone(kp)) {
1855                 /* This kprobe has gone, we couldn't enable it. */
1856                 ret = -EINVAL;
1857                 goto out;
1858         }
1859
1860         if (p != kp)
1861                 kp->flags &= ~KPROBE_FLAG_DISABLED;
1862
1863         if (!kprobes_all_disarmed && kprobe_disabled(p)) {
1864                 p->flags &= ~KPROBE_FLAG_DISABLED;
1865                 arm_kprobe(p);
1866         }
1867 out:
1868         mutex_unlock(&kprobe_mutex);
1869         return ret;
1870 }
1871 EXPORT_SYMBOL_GPL(enable_kprobe);
1872
1873 static void __kprobes arm_all_kprobes(void)
1874 {
1875         struct hlist_head *head;
1876         struct hlist_node *node;
1877         struct kprobe *p;
1878         unsigned int i;
1879
1880         mutex_lock(&kprobe_mutex);
1881
1882         /* If kprobes are armed, just return */
1883         if (!kprobes_all_disarmed)
1884                 goto already_enabled;
1885
1886         /* Arming kprobes doesn't optimize kprobe itself */
1887         mutex_lock(&text_mutex);
1888         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1889                 head = &kprobe_table[i];
1890                 hlist_for_each_entry_rcu(p, node, head, hlist)
1891                         if (!kprobe_disabled(p))
1892                                 __arm_kprobe(p);
1893         }
1894         mutex_unlock(&text_mutex);
1895
1896         kprobes_all_disarmed = false;
1897         printk(KERN_INFO "Kprobes globally enabled\n");
1898
1899 already_enabled:
1900         mutex_unlock(&kprobe_mutex);
1901         return;
1902 }
1903
1904 static void __kprobes disarm_all_kprobes(void)
1905 {
1906         struct hlist_head *head;
1907         struct hlist_node *node;
1908         struct kprobe *p;
1909         unsigned int i;
1910
1911         mutex_lock(&kprobe_mutex);
1912
1913         /* If kprobes are already disarmed, just return */
1914         if (kprobes_all_disarmed)
1915                 goto already_disabled;
1916
1917         kprobes_all_disarmed = true;
1918         printk(KERN_INFO "Kprobes globally disabled\n");
1919
1920         /*
1921          * Here we call get_online_cpus() for avoiding text_mutex deadlock,
1922          * because disarming may also unoptimize kprobes.
1923          */
1924         get_online_cpus();
1925         mutex_lock(&text_mutex);
1926         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1927                 head = &kprobe_table[i];
1928                 hlist_for_each_entry_rcu(p, node, head, hlist) {
1929                         if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
1930                                 __disarm_kprobe(p);
1931                 }
1932         }
1933
1934         mutex_unlock(&text_mutex);
1935         put_online_cpus();
1936         mutex_unlock(&kprobe_mutex);
1937         /* Allow all currently running kprobes to complete */
1938         synchronize_sched();
1939         return;
1940
1941 already_disabled:
1942         mutex_unlock(&kprobe_mutex);
1943         return;
1944 }
1945
1946 /*
1947  * XXX: The debugfs bool file interface doesn't allow for callbacks
1948  * when the bool state is switched. We can reuse that facility when
1949  * available
1950  */
1951 static ssize_t read_enabled_file_bool(struct file *file,
1952                char __user *user_buf, size_t count, loff_t *ppos)
1953 {
1954         char buf[3];
1955
1956         if (!kprobes_all_disarmed)
1957                 buf[0] = '1';
1958         else
1959                 buf[0] = '0';
1960         buf[1] = '\n';
1961         buf[2] = 0x00;
1962         return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
1963 }
1964
1965 static ssize_t write_enabled_file_bool(struct file *file,
1966                const char __user *user_buf, size_t count, loff_t *ppos)
1967 {
1968         char buf[32];
1969         int buf_size;
1970
1971         buf_size = min(count, (sizeof(buf)-1));
1972         if (copy_from_user(buf, user_buf, buf_size))
1973                 return -EFAULT;
1974
1975         switch (buf[0]) {
1976         case 'y':
1977         case 'Y':
1978         case '1':
1979                 arm_all_kprobes();
1980                 break;
1981         case 'n':
1982         case 'N':
1983         case '0':
1984                 disarm_all_kprobes();
1985                 break;
1986         }
1987
1988         return count;
1989 }
1990
1991 static const struct file_operations fops_kp = {
1992         .read =         read_enabled_file_bool,
1993         .write =        write_enabled_file_bool,
1994 };
1995
1996 static int __kprobes debugfs_kprobe_init(void)
1997 {
1998         struct dentry *dir, *file;
1999         unsigned int value = 1;
2000
2001         dir = debugfs_create_dir("kprobes", NULL);
2002         if (!dir)
2003                 return -ENOMEM;
2004
2005         file = debugfs_create_file("list", 0444, dir, NULL,
2006                                 &debugfs_kprobes_operations);
2007         if (!file) {
2008                 debugfs_remove(dir);
2009                 return -ENOMEM;
2010         }
2011
2012         file = debugfs_create_file("enabled", 0600, dir,
2013                                         &value, &fops_kp);
2014         if (!file) {
2015                 debugfs_remove(dir);
2016                 return -ENOMEM;
2017         }
2018
2019         return 0;
2020 }
2021
2022 late_initcall(debugfs_kprobe_init);
2023 #endif /* CONFIG_DEBUG_FS */
2024
2025 module_init(init_kprobes);
2026
2027 /* defined in arch/.../kernel/kprobes.c */
2028 EXPORT_SYMBOL_GPL(jprobe_return);