sched: fix buddie group latency
[safe/jmp/linux-2.6] / kernel / kprobes.c
index a1e233a..7ba8cd9 100644 (file)
@@ -123,7 +123,7 @@ static int collect_garbage_slots(void);
 static int __kprobes check_safety(void)
 {
        int ret = 0;
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER)
        ret = freeze_processes();
        if (ret == 0) {
                struct task_struct *p, *q;
@@ -327,7 +327,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
        struct kprobe *kp;
 
        list_for_each_entry_rcu(kp, &p->list, list) {
-               if (kp->pre_handler) {
+               if (kp->pre_handler && !kprobe_gone(kp)) {
                        set_kprobe_instance(kp);
                        if (kp->pre_handler(kp, regs))
                                return 1;
@@ -343,7 +343,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
        struct kprobe *kp;
 
        list_for_each_entry_rcu(kp, &p->list, list) {
-               if (kp->post_handler) {
+               if (kp->post_handler && !kprobe_gone(kp)) {
                        set_kprobe_instance(kp);
                        kp->post_handler(kp, regs, flags);
                        reset_kprobe_instance();
@@ -545,9 +545,10 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
        ap->addr = p->addr;
        ap->pre_handler = aggr_pre_handler;
        ap->fault_handler = aggr_fault_handler;
-       if (p->post_handler)
+       /* We don't care the kprobe which has gone. */
+       if (p->post_handler && !kprobe_gone(p))
                ap->post_handler = aggr_post_handler;
-       if (p->break_handler)
+       if (p->break_handler && !kprobe_gone(p))
                ap->break_handler = aggr_break_handler;
 
        INIT_LIST_HEAD(&ap->list);
@@ -566,17 +567,41 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
        int ret = 0;
        struct kprobe *ap;
 
+       if (kprobe_gone(old_p)) {
+               /*
+                * Attempting to insert new probe at the same location that
+                * had a probe in the module vaddr area which already
+                * freed. So, the instruction slot has already been
+                * released. We need a new slot for the new probe.
+                */
+               ret = arch_prepare_kprobe(old_p);
+               if (ret)
+                       return ret;
+       }
        if (old_p->pre_handler == aggr_pre_handler) {
                copy_kprobe(old_p, p);
                ret = add_new_kprobe(old_p, p);
+               ap = old_p;
        } else {
                ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
-               if (!ap)
+               if (!ap) {
+                       if (kprobe_gone(old_p))
+                               arch_remove_kprobe(old_p);
                        return -ENOMEM;
+               }
                add_aggr_kprobe(ap, old_p);
                copy_kprobe(ap, p);
                ret = add_new_kprobe(ap, p);
        }
+       if (kprobe_gone(old_p)) {
+               /*
+                * If the old_p has gone, its breakpoint has been disarmed.
+                * We have to arm it again after preparing real kprobes.
+                */
+               ap->flags &= ~KPROBE_FLAG_GONE;
+               if (kprobe_enabled)
+                       arch_arm_kprobe(ap);
+       }
        return ret;
 }
 
@@ -619,8 +644,7 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
        return (kprobe_opcode_t *)(((char *)addr) + p->offset);
 }
 
-static int __kprobes __register_kprobe(struct kprobe *p,
-       unsigned long called_from)
+int __kprobes register_kprobe(struct kprobe *p)
 {
        int ret = 0;
        struct kprobe *old_p;
@@ -639,28 +663,30 @@ static int __kprobes __register_kprobe(struct kprobe *p,
                return -EINVAL;
        }
 
-       p->mod_refcounted = 0;
-
+       p->flags = 0;
        /*
         * Check if are we probing a module.
         */
        probed_mod = __module_text_address((unsigned long) p->addr);
        if (probed_mod) {
-               struct module *calling_mod;
-               calling_mod = __module_text_address(called_from);
                /*
-                * We must allow modules to probe themself and in this case
-                * avoid incrementing the module refcount, so as to allow
-                * unloading of self probing modules.
+                * We must hold a refcount of the probed module while updating
+                * its code to prohibit unexpected unloading.
                 */
-               if (calling_mod != probed_mod) {
-                       if (unlikely(!try_module_get(probed_mod))) {
-                               preempt_enable();
-                               return -EINVAL;
-                       }
-                       p->mod_refcounted = 1;
-               } else
-                       probed_mod = NULL;
+               if (unlikely(!try_module_get(probed_mod))) {
+                       preempt_enable();
+                       return -EINVAL;
+               }
+               /*
+                * If the module freed .init.text, we couldn't insert
+                * kprobes in there.
+                */
+               if (within_module_init((unsigned long)p->addr, probed_mod) &&
+                   probed_mod->state != MODULE_STATE_COMING) {
+                       module_put(probed_mod);
+                       preempt_enable();
+                       return -EINVAL;
+               }
        }
        preempt_enable();
 
@@ -687,8 +713,9 @@ static int __kprobes __register_kprobe(struct kprobe *p,
 out:
        mutex_unlock(&kprobe_mutex);
 
-       if (ret && probed_mod)
+       if (probed_mod)
                module_put(probed_mod);
+
        return ret;
 }
 
@@ -716,16 +743,16 @@ valid_p:
             list_is_singular(&old_p->list))) {
                /*
                 * Only probe on the hash list. Disarm only if kprobes are
-                * enabled - otherwise, the breakpoint would already have
-                * been removed. We save on flushing icache.
+                * enabled and not gone - otherwise, the breakpoint would
+                * already have been removed. We save on flushing icache.
                 */
-               if (kprobe_enabled)
+               if (kprobe_enabled && !kprobe_gone(old_p))
                        arch_disarm_kprobe(p);
                hlist_del_rcu(&old_p->hlist);
        } else {
-               if (p->break_handler)
+               if (p->break_handler && !kprobe_gone(p))
                        old_p->break_handler = NULL;
-               if (p->post_handler) {
+               if (p->post_handler && !kprobe_gone(p)) {
                        list_for_each_entry_rcu(list_p, &old_p->list, list) {
                                if ((list_p != p) && (list_p->post_handler))
                                        goto noclean;
@@ -740,39 +767,27 @@ noclean:
 
 static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
 {
-       struct module *mod;
        struct kprobe *old_p;
 
-       if (p->mod_refcounted) {
-               /*
-                * Since we've already incremented refcount,
-                * we don't need to disable preemption.
-                */
-               mod = module_text_address((unsigned long)p->addr);
-               if (mod)
-                       module_put(mod);
-       }
-
-       if (list_empty(&p->list) || list_is_singular(&p->list)) {
-               if (!list_empty(&p->list)) {
-                       /* "p" is the last child of an aggr_kprobe */
-                       old_p = list_entry(p->list.next, struct kprobe, list);
-                       list_del(&p->list);
-                       kfree(old_p);
-               }
+       if (list_empty(&p->list))
                arch_remove_kprobe(p);
+       else if (list_is_singular(&p->list)) {
+               /* "p" is the last child of an aggr_kprobe */
+               old_p = list_entry(p->list.next, struct kprobe, list);
+               list_del(&p->list);
+               arch_remove_kprobe(old_p);
+               kfree(old_p);
        }
 }
 
-static int __kprobes __register_kprobes(struct kprobe **kps, int num,
-       unsigned long called_from)
+int __kprobes register_kprobes(struct kprobe **kps, int num)
 {
        int i, ret = 0;
 
        if (num <= 0)
                return -EINVAL;
        for (i = 0; i < num; i++) {
-               ret = __register_kprobe(kps[i], called_from);
+               ret = register_kprobe(kps[i]);
                if (ret < 0) {
                        if (i > 0)
                                unregister_kprobes(kps, i);
@@ -782,26 +797,11 @@ static int __kprobes __register_kprobes(struct kprobe **kps, int num,
        return ret;
 }
 
-/*
- * Registration and unregistration functions for kprobe.
- */
-int __kprobes register_kprobe(struct kprobe *p)
-{
-       return __register_kprobes(&p, 1,
-                                 (unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_kprobe(struct kprobe *p)
 {
        unregister_kprobes(&p, 1);
 }
 
-int __kprobes register_kprobes(struct kprobe **kps, int num)
-{
-       return __register_kprobes(kps, num,
-                                 (unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_kprobes(struct kprobe **kps, int num)
 {
        int i;
@@ -830,8 +830,7 @@ unsigned long __weak arch_deref_entry_point(void *entry)
        return (unsigned long)entry;
 }
 
-static int __kprobes __register_jprobes(struct jprobe **jps, int num,
-       unsigned long called_from)
+int __kprobes register_jprobes(struct jprobe **jps, int num)
 {
        struct jprobe *jp;
        int ret = 0, i;
@@ -849,7 +848,7 @@ static int __kprobes __register_jprobes(struct jprobe **jps, int num,
                        /* Todo: Verify probepoint is a function entry point */
                        jp->kp.pre_handler = setjmp_pre_handler;
                        jp->kp.break_handler = longjmp_break_handler;
-                       ret = __register_kprobe(&jp->kp, called_from);
+                       ret = register_kprobe(&jp->kp);
                }
                if (ret < 0) {
                        if (i > 0)
@@ -862,8 +861,7 @@ static int __kprobes __register_jprobes(struct jprobe **jps, int num,
 
 int __kprobes register_jprobe(struct jprobe *jp)
 {
-       return __register_jprobes(&jp, 1,
-               (unsigned long)__builtin_return_address(0));
+       return register_jprobes(&jp, 1);
 }
 
 void __kprobes unregister_jprobe(struct jprobe *jp)
@@ -871,12 +869,6 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
        unregister_jprobes(&jp, 1);
 }
 
-int __kprobes register_jprobes(struct jprobe **jps, int num)
-{
-       return __register_jprobes(jps, num,
-               (unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_jprobes(struct jprobe **jps, int num)
 {
        int i;
@@ -939,8 +931,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
        return 0;
 }
 
-static int __kprobes __register_kretprobe(struct kretprobe *rp,
-                                         unsigned long called_from)
+int __kprobes register_kretprobe(struct kretprobe *rp)
 {
        int ret = 0;
        struct kretprobe_instance *inst;
@@ -986,21 +977,20 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
 
        rp->nmissed = 0;
        /* Establish function entry probe point */
-       ret = __register_kprobe(&rp->kp, called_from);
+       ret = register_kprobe(&rp->kp);
        if (ret != 0)
                free_rp_inst(rp);
        return ret;
 }
 
-static int __kprobes __register_kretprobes(struct kretprobe **rps, int num,
-       unsigned long called_from)
+int __kprobes register_kretprobes(struct kretprobe **rps, int num)
 {
        int ret = 0, i;
 
        if (num <= 0)
                return -EINVAL;
        for (i = 0; i < num; i++) {
-               ret = __register_kretprobe(rps[i], called_from);
+               ret = register_kretprobe(rps[i]);
                if (ret < 0) {
                        if (i > 0)
                                unregister_kretprobes(rps, i);
@@ -1010,23 +1000,11 @@ static int __kprobes __register_kretprobes(struct kretprobe **rps, int num,
        return ret;
 }
 
-int __kprobes register_kretprobe(struct kretprobe *rp)
-{
-       return __register_kretprobes(&rp, 1,
-                       (unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_kretprobe(struct kretprobe *rp)
 {
        unregister_kretprobes(&rp, 1);
 }
 
-int __kprobes register_kretprobes(struct kretprobe **rps, int num)
-{
-       return __register_kretprobes(rps, num,
-                       (unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
 {
        int i;
@@ -1074,6 +1052,72 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 
 #endif /* CONFIG_KRETPROBES */
 
+/* Set the kprobe gone and remove its instruction buffer. */
+static void __kprobes kill_kprobe(struct kprobe *p)
+{
+       struct kprobe *kp;
+       p->flags |= KPROBE_FLAG_GONE;
+       if (p->pre_handler == aggr_pre_handler) {
+               /*
+                * If this is an aggr_kprobe, we have to list all the
+                * chained probes and mark them GONE.
+                */
+               list_for_each_entry_rcu(kp, &p->list, list)
+                       kp->flags |= KPROBE_FLAG_GONE;
+               p->post_handler = NULL;
+               p->break_handler = NULL;
+       }
+       /*
+        * Here, we can remove insn_slot safely, because no thread calls
+        * the original probed function (which will be freed soon) any more.
+        */
+       arch_remove_kprobe(p);
+}
+
+/* Module notifier call back, checking kprobes on the module */
+static int __kprobes kprobes_module_callback(struct notifier_block *nb,
+                                            unsigned long val, void *data)
+{
+       struct module *mod = data;
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct kprobe *p;
+       unsigned int i;
+       int checkcore = (val == MODULE_STATE_GOING);
+
+       if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
+               return NOTIFY_DONE;
+
+       /*
+        * When MODULE_STATE_GOING was notified, both of module .text and
+        * .init.text sections would be freed. When MODULE_STATE_LIVE was
+        * notified, only .init.text section would be freed. We need to
+        * disable kprobes which have been inserted in the sections.
+        */
+       mutex_lock(&kprobe_mutex);
+       for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+               head = &kprobe_table[i];
+               hlist_for_each_entry_rcu(p, node, head, hlist)
+                       if (within_module_init((unsigned long)p->addr, mod) ||
+                           (checkcore &&
+                            within_module_core((unsigned long)p->addr, mod))) {
+                               /*
+                                * The vaddr this probe is installed will soon
+                                * be vfreed buy not synced to disk. Hence,
+                                * disarming the breakpoint isn't needed.
+                                */
+                               kill_kprobe(p);
+                       }
+       }
+       mutex_unlock(&kprobe_mutex);
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block kprobe_module_nb = {
+       .notifier_call = kprobes_module_callback,
+       .priority = 0
+};
+
 static int __init init_kprobes(void)
 {
        int i, err = 0;
@@ -1130,6 +1174,9 @@ static int __init init_kprobes(void)
        err = arch_init_kprobes();
        if (!err)
                err = register_die_notifier(&kprobe_exceptions_nb);
+       if (!err)
+               err = register_module_notifier(&kprobe_module_nb);
+
        kprobes_initialized = (err == 0);
 
        if (!err)
@@ -1150,10 +1197,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
        else
                kprobe_type = "k";
        if (sym)
-               seq_printf(pi, "%p  %s  %s+0x%x  %s\n", p->addr, kprobe_type,
-                       sym, offset, (modname ? modname : " "));
+               seq_printf(pi, "%p  %s  %s+0x%x  %s %s\n", p->addr, kprobe_type,
+                       sym, offset, (modname ? modname : " "),
+                       (kprobe_gone(p) ? "[GONE]" : ""));
        else
-               seq_printf(pi, "%p  %s  %p\n", p->addr, kprobe_type, p->addr);
+               seq_printf(pi, "%p  %s  %p %s\n", p->addr, kprobe_type, p->addr,
+                       (kprobe_gone(p) ? "[GONE]" : ""));
 }
 
 static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1234,7 +1283,8 @@ static void __kprobes enable_all_kprobes(void)
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist)
-                       arch_arm_kprobe(p);
+                       if (!kprobe_gone(p))
+                               arch_arm_kprobe(p);
        }
 
        kprobe_enabled = true;
@@ -1263,7 +1313,7 @@ static void __kprobes disable_all_kprobes(void)
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist) {
-                       if (!arch_trampoline_kprobe(p))
+                       if (!arch_trampoline_kprobe(p) && !kprobe_gone(p))
                                arch_disarm_kprobe(p);
                }
        }