[PATCH] x86-64: robustify bad_dma_address handling
[safe/jmp/linux-2.6] / arch / x86_64 / kernel / kprobes.c
index 6cb40d1..209c8c0 100644 (file)
  *              Added function return probes functionality
  */
 
-#include <linux/config.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
-#include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/preempt.h>
+#include <linux/module.h>
 
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/kdebug.h>
+#include <asm/uaccess.h>
 
-static DECLARE_MUTEX(kprobe_mutex);
 void jprobe_return_end(void);
+static void __kprobes arch_copy_kprobe(struct kprobe *p);
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -52,7 +52,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 /*
  * returns non-zero if opcode modifies the interrupt flag.
  */
-static inline int is_IF_modifier(kprobe_opcode_t *insn)
+static __always_inline int is_IF_modifier(kprobe_opcode_t *insn)
 {
        switch (*insn) {
        case 0xfa:              /* cli */
@@ -70,12 +70,11 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn)
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
        /* insn: must be on special executable page on x86_64. */
-       down(&kprobe_mutex);
        p->ainsn.insn = get_insn_slot();
-       up(&kprobe_mutex);
        if (!p->ainsn.insn) {
                return -ENOMEM;
        }
+       arch_copy_kprobe(p);
        return 0;
 }
 
@@ -84,7 +83,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
  * If it does, return the address of the 32-bit displacement word.
  * If not, return null.
  */
-static inline s32 *is_riprel(u8 *insn)
+static s32 __kprobes *is_riprel(u8 *insn)
 {
 #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf)               \
        (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
@@ -182,7 +181,7 @@ static inline s32 *is_riprel(u8 *insn)
        return NULL;
 }
 
-void __kprobes arch_copy_kprobe(struct kprobe *p)
+static void __kprobes arch_copy_kprobe(struct kprobe *p)
 {
        s32 *ripdisp;
        memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE);
@@ -224,12 +223,12 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-       down(&kprobe_mutex);
-       free_insn_slot(p->ainsn.insn);
-       up(&kprobe_mutex);
+       mutex_lock(&kprobe_mutex);
+       free_insn_slot(p->ainsn.insn, 0);
+       mutex_unlock(&kprobe_mutex);
 }
 
-static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
        kcb->prev_kprobe.kp = kprobe_running();
        kcb->prev_kprobe.status = kcb->kprobe_status;
@@ -237,7 +236,7 @@ static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
        kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags;
 }
 
-static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
        __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
        kcb->kprobe_status = kcb->prev_kprobe.status;
@@ -245,7 +244,7 @@ static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
        kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags;
 }
 
-static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
                                struct kprobe_ctlblk *kcb)
 {
        __get_cpu_var(current_kprobe) = p;
@@ -266,48 +265,48 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
                regs->rip = (unsigned long)p->ainsn.insn;
 }
 
+/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
                                      struct pt_regs *regs)
 {
        unsigned long *sara = (unsigned long *)regs->rsp;
-        struct kretprobe_instance *ri;
+       struct kretprobe_instance *ri;
 
-        if ((ri = get_free_rp_inst(rp)) != NULL) {
-                ri->rp = rp;
-                ri->task = current;
+       if ((ri = get_free_rp_inst(rp)) != NULL) {
+               ri->rp = rp;
+               ri->task = current;
                ri->ret_addr = (kprobe_opcode_t *) *sara;
 
                /* Replace the return addr with trampoline addr */
                *sara = (unsigned long) &kretprobe_trampoline;
-
-                add_rp_inst(ri);
-        } else {
-                rp->nmissed++;
-        }
+               add_rp_inst(ri);
+       } else {
+               rp->nmissed++;
+       }
 }
 
-/*
- * Interrupts are disabled on entry as trap3 is an interrupt gate and they
- * remain disabled thorough out this function.
- */
 int __kprobes kprobe_handler(struct pt_regs *regs)
 {
        struct kprobe *p;
        int ret = 0;
        kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t));
-       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+       struct kprobe_ctlblk *kcb;
+
+       /*
+        * We don't want to be preempted for the entire
+        * duration of kprobe processing
+        */
+       preempt_disable();
+       kcb = get_kprobe_ctlblk();
 
        /* Check we're not actually recursing */
        if (kprobe_running()) {
-               /* We *are* holding lock here, so this is safe.
-                  Disarm the probe we just hit, and ignore it. */
                p = get_kprobe(addr);
                if (p) {
                        if (kcb->kprobe_status == KPROBE_HIT_SS &&
                                *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
                                regs->eflags &= ~TF_MASK;
                                regs->eflags |= kcb->kprobe_saved_rflags;
-                               unlock_kprobes();
                                goto no_kprobe;
                        } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
                                /* TODO: Provide re-entrancy from
@@ -329,25 +328,31 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
                                 */
                                save_previous_kprobe(kcb);
                                set_current_kprobe(p, regs, kcb);
-                               p->nmissed++;
+                               kprobes_inc_nmissed_count(p);
                                prepare_singlestep(p, regs);
                                kcb->kprobe_status = KPROBE_REENTER;
                                return 1;
                        }
                } else {
+                       if (*addr != BREAKPOINT_INSTRUCTION) {
+                       /* The breakpoint instruction was removed by
+                        * another cpu right after we hit, no further
+                        * handling of this interrupt is appropriate
+                        */
+                               regs->rip = (unsigned long)addr;
+                               ret = 1;
+                               goto no_kprobe;
+                       }
                        p = __get_cpu_var(current_kprobe);
                        if (p->break_handler && p->break_handler(p, regs)) {
                                goto ss_probe;
                        }
                }
-               /* If it's not ours, can't be delete race, (we hold lock). */
                goto no_kprobe;
        }
 
-       lock_kprobes();
        p = get_kprobe(addr);
        if (!p) {
-               unlock_kprobes();
                if (*addr != BREAKPOINT_INSTRUCTION) {
                        /*
                         * The breakpoint instruction was removed right
@@ -365,11 +370,6 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
                goto no_kprobe;
        }
 
-       /*
-        * This preempt_disable() matches the preempt_enable_no_resched()
-        * in post_kprobe_handler()
-        */
-       preempt_disable();
        set_current_kprobe(p, regs, kcb);
        kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 
@@ -383,6 +383,7 @@ ss_probe:
        return 1;
 
 no_kprobe:
+       preempt_enable_no_resched();
        return ret;
 }
 
@@ -403,13 +404,15 @@ no_kprobe:
  */
 int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
-        struct kretprobe_instance *ri = NULL;
-        struct hlist_head *head;
-        struct hlist_node *node, *tmp;
-       unsigned long orig_ret_address = 0;
+       struct kretprobe_instance *ri = NULL;
+       struct hlist_head *head, empty_rp;
+       struct hlist_node *node, *tmp;
+       unsigned long flags, orig_ret_address = 0;
        unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
-        head = kretprobe_inst_table_head(current);
+       INIT_HLIST_HEAD(&empty_rp);
+       spin_lock_irqsave(&kretprobe_lock, flags);
+       head = kretprobe_inst_table_head(current);
 
        /*
         * It is possible to have multiple instances associated with a given
@@ -420,20 +423,20 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         * We can handle this because:
         *     - instances are always inserted at the head of the list
         *     - when multiple return probes are registered for the same
-         *       function, the first instance's ret_addr will point to the
+        *       function, the first instance's ret_addr will point to the
         *       real return address, and all the rest will point to
         *       kretprobe_trampoline
         */
        hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
-                if (ri->task != current)
+               if (ri->task != current)
                        /* another task is sharing our hash bucket */
-                        continue;
+                       continue;
 
                if (ri->rp && ri->rp->handler)
                        ri->rp->handler(ri, regs);
 
                orig_ret_address = (unsigned long)ri->ret_addr;
-               recycle_rp_inst(ri);
+               recycle_rp_inst(ri, &empty_rp);
 
                if (orig_ret_address != trampoline_address)
                        /*
@@ -448,15 +451,19 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
        regs->rip = orig_ret_address;
 
        reset_current_kprobe();
-       unlock_kprobes();
+       spin_unlock_irqrestore(&kretprobe_lock, flags);
        preempt_enable_no_resched();
 
-        /*
-         * By returning a non-zero value, we are telling
-         * kprobe_handler() that we have handled unlocking
-        * and re-enabling preemption
-         */
-        return 1;
+       hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+               hlist_del(&ri->hlist);
+               kfree(ri);
+       }
+       /*
+        * By returning a non-zero value, we are telling
+        * kprobe_handler() that we don't want the post_handler
+        * to run (and have re-enabled preemption)
+        */
+       return 1;
 }
 
 /*
@@ -510,13 +517,13 @@ static void __kprobes resume_execution(struct kprobe *p,
                *tos = orig_rip + (*tos - copy_rip);
                break;
        case 0xff:
-               if ((*insn & 0x30) == 0x10) {
+               if ((insn[1] & 0x30) == 0x10) {
                        /* call absolute, indirect */
                        /* Fix return addr; rip is correct. */
                        next_rip = regs->rip;
                        *tos = orig_rip + (*tos - copy_rip);
-               } else if (((*insn & 0x31) == 0x20) ||  /* jmp near, absolute indirect */
-                          ((*insn & 0x31) == 0x21)) {  /* jmp far, absolute indirect */
+               } else if (((insn[1] & 0x31) == 0x20) ||        /* jmp near, absolute indirect */
+                          ((insn[1] & 0x31) == 0x21)) {        /* jmp far, absolute indirect */
                        /* rip is correct. */
                        next_rip = regs->rip;
                }
@@ -536,10 +543,6 @@ static void __kprobes resume_execution(struct kprobe *p,
        }
 }
 
-/*
- * Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled thoroughout this function.  And we hold kprobe lock.
- */
 int __kprobes post_kprobe_handler(struct pt_regs *regs)
 {
        struct kprobe *cur = kprobe_running();
@@ -560,8 +563,6 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs)
        if (kcb->kprobe_status == KPROBE_REENTER) {
                restore_previous_kprobe(kcb);
                goto out;
-       } else {
-               unlock_kprobes();
        }
        reset_current_kprobe();
 out:
@@ -578,22 +579,66 @@ out:
        return 1;
 }
 
-/* Interrupts disabled, kprobe_lock held. */
 int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
        struct kprobe *cur = kprobe_running();
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+       const struct exception_table_entry *fixup;
 
-       if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-               return 1;
-
-       if (kcb->kprobe_status & KPROBE_HIT_SS) {
-               resume_execution(cur, regs, kcb);
+       switch(kcb->kprobe_status) {
+       case KPROBE_HIT_SS:
+       case KPROBE_REENTER:
+               /*
+                * We are here because the instruction being single
+                * stepped caused a page fault. We reset the current
+                * kprobe and the rip points back to the probe address
+                * and allow the page fault handler to continue as a
+                * normal page fault.
+                */
+               regs->rip = (unsigned long)cur->addr;
                regs->eflags |= kcb->kprobe_old_rflags;
-
-               reset_current_kprobe();
-               unlock_kprobes();
+               if (kcb->kprobe_status == KPROBE_REENTER)
+                       restore_previous_kprobe(kcb);
+               else
+                       reset_current_kprobe();
                preempt_enable_no_resched();
+               break;
+       case KPROBE_HIT_ACTIVE:
+       case KPROBE_HIT_SSDONE:
+               /*
+                * We increment the nmissed count for accounting,
+                * we can also use npre/npostfault count for accouting
+                * these specific fault cases.
+                */
+               kprobes_inc_nmissed_count(cur);
+
+               /*
+                * We come here because instructions in the pre/post
+                * handler caused the page_fault, this could happen
+                * if handler tries to access user space by
+                * copy_from_user(), get_user() etc. Let the
+                * user-specified handler try to fix it first.
+                */
+               if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+                       return 1;
+
+               /*
+                * In case the user-specified fault handler returned
+                * zero, try to fix up.
+                */
+               fixup = search_exception_tables(regs->rip);
+               if (fixup) {
+                       regs->rip = fixup->fixup;
+                       return 1;
+               }
+
+               /*
+                * fixup() could not handle it,
+                * Let do_page_fault() fix it.
+                */
+               break;
+       default:
+               break;
        }
        return 0;
 }
@@ -607,7 +652,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
        struct die_args *args = (struct die_args *)data;
        int ret = NOTIFY_DONE;
 
-       preempt_disable();
+       if (args->regs && user_mode(args->regs))
+               return ret;
+
        switch (val) {
        case DIE_INT3:
                if (kprobe_handler(args->regs))
@@ -619,14 +666,16 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
                break;
        case DIE_GPF:
        case DIE_PAGE_FAULT:
+               /* kprobe_running() needs smp_processor_id() */
+               preempt_disable();
                if (kprobe_running() &&
                    kprobe_fault_handler(args->regs, args->trapnr))
                        ret = NOTIFY_STOP;
+               preempt_enable();
                break;
        default:
                break;
        }
-       preempt_enable();
        return ret;
 }
 
@@ -688,6 +737,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
                *regs = kcb->jprobe_saved_regs;
                memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
                       MIN_STACK_SIZE(stack_addr));
+               preempt_enable_no_resched();
                return 1;
        }
        return 0;