kmemleak: Fix scheduling-while-atomic bug
authorIngo Molnar <mingo@elte.hu>
Wed, 1 Jul 2009 07:43:53 +0000 (09:43 +0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 Jul 2009 17:26:23 +0000 (10:26 -0700)
One of the kmemleak changes caused the following
scheduling-while-holding-the-tasklist-lock regression on x86:

BUG: sleeping function called from invalid context at mm/kmemleak.c:795
in_atomic(): 1, irqs_disabled(): 0, pid: 1737, name: kmemleak
2 locks held by kmemleak/1737:
 #0:  (scan_mutex){......}, at: [<c10c4376>] kmemleak_scan_thread+0x45/0x86
 #1:  (tasklist_lock){......}, at: [<c10c3bb4>] kmemleak_scan+0x1a9/0x39c
Pid: 1737, comm: kmemleak Not tainted 2.6.31-rc1-tip #59266
Call Trace:
 [<c105ac0f>] ? __debug_show_held_locks+0x1e/0x20
 [<c102e490>] __might_sleep+0x10a/0x111
 [<c10c38d5>] scan_yield+0x17/0x3b
 [<c10c3970>] scan_block+0x39/0xd4
 [<c10c3bc6>] kmemleak_scan+0x1bb/0x39c
 [<c10c4331>] ? kmemleak_scan_thread+0x0/0x86
 [<c10c437b>] kmemleak_scan_thread+0x4a/0x86
 [<c104d73e>] kthread+0x6e/0x73
 [<c104d6d0>] ? kthread+0x0/0x73
 [<c100959f>] kernel_thread_helper+0x7/0x10
kmemleak: 834 new suspected memory leaks (see /sys/kernel/debug/kmemleak)

The bit causing it is highly dubious:

static void scan_yield(void)
{
        might_sleep();

        if (time_is_before_eq_jiffies(next_scan_yield)) {
                schedule();
                next_scan_yield = jiffies + jiffies_scan_yield;
        }
}

It called deep inside the codepath and in a conditional way,
and that is what crapped up when one of the new scan_block()
uses grew a tasklist_lock dependency.

This minimal patch removes that yielding stuff and adds the
proper cond_resched().

The background scanning thread could probably also be reniced
to +10.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/kmemleak.c

index eeece2d..e766e1d 100644 (file)
 #define MAX_TRACE              16      /* stack trace length */
 #define REPORTS_NR             50      /* maximum number of reported leaks */
 #define MSECS_MIN_AGE          5000    /* minimum object age for reporting */
-#define MSECS_SCAN_YIELD       10      /* CPU yielding period */
 #define SECS_FIRST_SCAN                60      /* delay before the first scan */
 #define SECS_SCAN_WAIT         600     /* subsequent auto scanning delay */
 
@@ -186,10 +185,7 @@ static atomic_t kmemleak_error = ATOMIC_INIT(0);
 static unsigned long min_addr = ULONG_MAX;
 static unsigned long max_addr;
 
-/* used for yielding the CPU to other tasks during scanning */
-static unsigned long next_scan_yield;
 static struct task_struct *scan_thread;
-static unsigned long jiffies_scan_yield;
 /* used to avoid reporting of recently allocated objects */
 static unsigned long jiffies_min_age;
 static unsigned long jiffies_last_scan;
@@ -786,21 +782,6 @@ void kmemleak_no_scan(const void *ptr)
 EXPORT_SYMBOL(kmemleak_no_scan);
 
 /*
- * Yield the CPU so that other tasks get a chance to run.  The yielding is
- * rate-limited to avoid excessive number of calls to the schedule() function
- * during memory scanning.
- */
-static void scan_yield(void)
-{
-       might_sleep();
-
-       if (time_is_before_eq_jiffies(next_scan_yield)) {
-               schedule();
-               next_scan_yield = jiffies + jiffies_scan_yield;
-       }
-}
-
-/*
  * Memory scanning is a long process and it needs to be interruptable. This
  * function checks whether such interrupt condition occured.
  */
@@ -840,15 +821,6 @@ static void scan_block(void *_start, void *_end,
                if (scan_should_stop())
                        break;
 
-               /*
-                * When scanning a memory block with a corresponding
-                * kmemleak_object, the CPU yielding is handled in the calling
-                * code since it holds the object->lock to avoid the block
-                * freeing.
-                */
-               if (!scanned)
-                       scan_yield();
-
                object = find_and_get_object(pointer, 1);
                if (!object)
                        continue;
@@ -1014,7 +986,7 @@ static void kmemleak_scan(void)
         */
        object = list_entry(gray_list.next, typeof(*object), gray_list);
        while (&object->gray_list != &gray_list) {
-               scan_yield();
+               cond_resched();
 
                /* may add new objects to the list */
                if (!scan_should_stop())
@@ -1385,7 +1357,6 @@ void __init kmemleak_init(void)
        int i;
        unsigned long flags;
 
-       jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD);
        jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
        jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);