Merge branch 'linus' into perfcounters/core

[safe/jmp/linux-2.6] / arch / x86 / kernel / step.c
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c

index b801e76..e8b9863 100644 (file)
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -5,91 +5,7 @@
  #include <linux/mm.h>
  #include <linux/ptrace.h>
  
-#ifdef CONFIG_X86_32
-#include <linux/uaccess.h>
-
-#include <asm/desc.h>
-
-/*
- * Return EIP plus the CS segment base.  The segment limit is also
- * adjusted, clamped to the kernel/user address space (whichever is
- * appropriate), and returned in *eip_limit.
- *
- * The segment is checked, because it might have been changed by another
- * task between the original faulting instruction and here.
- *
- * If CS is no longer a valid code segment, or if EIP is beyond the
- * limit, or if it is a kernel address when CS is not a kernel segment,
- * then the returned value will be greater than *eip_limit.
- *
- * This is slow, but is very rarely executed.
- */
-unsigned long get_segment_eip(struct pt_regs *regs,
-                                           unsigned long *eip_limit)
-{
-       unsigned long ip = regs->ip;
-       unsigned seg = regs->cs & 0xffff;
-       u32 seg_ar, seg_limit, base, *desc;
-
-       /* Unlikely, but must come before segment checks. */
-       if (unlikely(regs->flags & VM_MASK)) {
-               base = seg << 4;
-               *eip_limit = base + 0xffff;
-               return base + (ip & 0xffff);
-       }
-
-       /* The standard kernel/user address space limit. */
-       *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
-
-       /* By far the most common cases. */
-       if (likely(SEGMENT_IS_FLAT_CODE(seg)))
-               return ip;
-
-       /* Check the segment exists, is within the current LDT/GDT size,
-          that kernel/user (ring 0..3) has the appropriate privilege,
-          that it's a code segment, and get the limit. */
-       __asm__("larl %3,%0; lsll %3,%1"
-                : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
-       if ((~seg_ar & 0x9800) || ip > seg_limit) {
-               *eip_limit = 0;
-               return 1;        /* So that returned ip > *eip_limit. */
-       }
-
-       /* Get the GDT/LDT descriptor base.
-          When you look for races in this code remember that
-          LDT and other horrors are only used in user space. */
-       if (seg & (1<<2)) {
-               /* Must lock the LDT while reading it. */
-               mutex_lock(&current->mm->context.lock);
-               desc = current->mm->context.ldt;
-               desc = (void *)desc + (seg & ~7);
-       } else {
-               /* Must disable preemption while reading the GDT. */
-               desc = (u32 *)get_cpu_gdt_table(get_cpu());
-               desc = (void *)desc + (seg & ~7);
-       }
-
-       /* Decode the code segment base from the descriptor */
-       base = get_desc_base((struct desc_struct *)desc);
-
-       if (seg & (1<<2))
-               mutex_unlock(&current->mm->context.lock);
-       else
-               put_cpu();
-
-       /* Adjust EIP and segment limit, and clamp at the kernel limit.
-          It's legitimate for segments to wrap at 0xffffffff. */
-       seg_limit += base;
-       if (seg_limit < *eip_limit && seg_limit >= base)
-               *eip_limit = seg_limit;
-       return ip + base;
-}
-#endif
-
-#ifdef CONFIG_X86_32
-static
-#endif
-unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
+unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
  {
         unsigned long addr, seg;
  
@@ -136,7 +52,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
  {
         int i, copied;
         unsigned char opcode[15];
-       unsigned long addr = convert_rip_to_linear(child, regs);
+       unsigned long addr = convert_ip_to_linear(child, regs);
  
         copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
         for (i = 0; i < copied; i++) {
@@ -189,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
  static int enable_single_step(struct task_struct *child)
  {
         struct pt_regs *regs = task_pt_regs(child);
+       unsigned long oflags;
+
+       /*
+        * If we stepped into a sysenter/syscall insn, it trapped in
+        * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
+        * If user-mode had set TF itself, then it's still clear from
+        * do_debug() and we need to set it again to restore the user
+        * state so we don't wrongly set TIF_FORCED_TF below.
+        * If enable_single_step() was used last and that is what
+        * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are
+        * already set and our bookkeeping is fine.
+        */
+       if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP)))
+               regs->flags |= X86_EFLAGS_TF;
  
         /*
          * Always set TIF_SINGLESTEP - this guarantees that
@@ -197,11 +127,7 @@ static int enable_single_step(struct task_struct *child)
          */
         set_tsk_thread_flag(child, TIF_SINGLESTEP);
  
-       /*
-        * If TF was already set, don't do anything else
-        */
-       if (regs->flags & X86_EFLAGS_TF)
-               return 0;
+       oflags = regs->flags;
  
         /* Set TF on the kernel stack.. */
         regs->flags |= X86_EFLAGS_TF;
@@ -210,9 +136,22 @@ static int enable_single_step(struct task_struct *child)
          * ..but if TF is changed by the instruction we will trace,
          * don't mark it as being "us" that set it, so that we
          * won't clear it by hand later.
+        *
+        * Note that if we don't actually execute the popf because
+        * of a signal arriving right now or suchlike, we will lose
+        * track of the fact that it really was "us" that set it.
          */
-       if (is_setting_trap_flag(child, regs))
+       if (is_setting_trap_flag(child, regs)) {
+               clear_tsk_thread_flag(child, TIF_FORCED_TF);
                 return 0;
+       }
+
+       /*
+        * If TF was already set, check whether it was us who set it.
+        * If not, we should never attempt a block step.
+        */
+       if (oflags & X86_EFLAGS_TF)
+               return test_tsk_thread_flag(child, TIF_FORCED_TF);
  
         set_tsk_thread_flag(child, TIF_FORCED_TF);
  
@@ -224,12 +163,15 @@ static int enable_single_step(struct task_struct *child)
   */
  static void write_debugctlmsr(struct task_struct *child, unsigned long val)
  {
+       if (child->thread.debugctlmsr == val)
+               return;
+
         child->thread.debugctlmsr = val;
  
         if (child != current)
                 return;
  
-       wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+       update_debugctlmsr(val);
  }
  
  /*
@@ -249,11 +191,11 @@ static void enable_step(struct task_struct *child, bool block)
                 write_debugctlmsr(child,
                                   child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
         } else {
-           write_debugctlmsr(child,
-                             child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+               write_debugctlmsr(child,
+                                 child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
  
-           if (!child->thread.debugctlmsr)
-                   clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+               if (!child->thread.debugctlmsr)
+                       clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
         }
  }
  
@@ -273,7 +215,7 @@ void user_disable_single_step(struct task_struct *child)
          * Make sure block stepping (BTF) is disabled.
          */
         write_debugctlmsr(child,
-                         child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+                         child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
  
         if (!child->thread.debugctlmsr)
                 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);