[PATCH] Remove down_write() from taskstats code invoked on the exit() path
authorShailabh Nagar <nagar@watson.ibm.com>
Fri, 14 Jul 2006 07:24:47 +0000 (00:24 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Sat, 15 Jul 2006 04:53:57 +0000 (21:53 -0700)
In send_cpu_listeners(), which is called on the exit path, a down_write()
was protecting operations like skb_clone() and genlmsg_unicast() that do
GFP_KERNEL allocations.  If the oom-killer decides to kill tasks to satisfy
the allocations,the exit of those tasks could block on the same semphore.

The down_write() was only needed to allow removal of invalid listeners from
the listener list.  The patch converts the down_write to a down_read and
defers the removal to a separate critical region.  This ensures that even
if the oom-killer is called, no other task's exit is blocked as it can
still acquire another down_read.

Thanks to Andrew Morton & Herbert Xu for pointing out the oom related
pitfalls, and to Chandra Seetharaman for suggesting this fix instead of
using something more complex like RCU.

Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
kernel/taskstats.c

index abb59e3..f45179c 100644 (file)
@@ -51,6 +51,7 @@ __read_mostly = {
 struct listener {
        struct list_head list;
        pid_t pid;
+       char valid;
 };
 
 struct listener_list {
@@ -127,7 +128,7 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
        struct listener *s, *tmp;
        struct sk_buff *skb_next, *skb_cur = skb;
        void *reply = genlmsg_data(genlhdr);
-       int rc, ret;
+       int rc, ret, delcount = 0;
 
        rc = genlmsg_end(skb, reply);
        if (rc < 0) {
@@ -137,7 +138,7 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
 
        rc = 0;
        listeners = &per_cpu(listener_array, cpu);
-       down_write(&listeners->sem);
+       down_read(&listeners->sem);
        list_for_each_entry_safe(s, tmp, &listeners->list, list) {
                skb_next = NULL;
                if (!list_is_last(&s->list, &listeners->list)) {
@@ -150,14 +151,26 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
                }
                ret = genlmsg_unicast(skb_cur, s->pid);
                if (ret == -ECONNREFUSED) {
-                       list_del(&s->list);
-                       kfree(s);
+                       s->valid = 0;
+                       delcount++;
                        rc = ret;
                }
                skb_cur = skb_next;
        }
-       up_write(&listeners->sem);
+       up_read(&listeners->sem);
+
+       if (!delcount)
+               return rc;
 
+       /* Delete invalidated entries */
+       down_write(&listeners->sem);
+       list_for_each_entry_safe(s, tmp, &listeners->list, list) {
+               if (!s->valid) {
+                       list_del(&s->list);
+                       kfree(s);
+               }
+       }
+       up_write(&listeners->sem);
        return rc;
 }
 
@@ -290,6 +303,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
                                goto cleanup;
                        s->pid = pid;
                        INIT_LIST_HEAD(&s->list);
+                       s->valid = 1;
 
                        listeners = &per_cpu(listener_array, cpu);
                        down_write(&listeners->sem);