Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[safe/jmp/linux-2.6] / net / core / sock.c
index fcd397a..37fe9b6 100644 (file)
 #include <linux/net_tstamp.h>
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
+#include <net/cls_cgroup.h>
 
 #include <linux/filter.h>
 
@@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
+#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+int net_cls_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_cls_subsys_id);
+#endif
+
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
        struct timeval tv;
@@ -307,6 +313,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
         */
        skb_len = skb->len;
 
+       /* we escape from rcu protected region, make sure we dont leak
+        * a norefcounted dst
+        */
+       skb_dst_force(skb);
+
        spin_lock_irqsave(&list->lock, flags);
        skb->dropcount = atomic_read(&sk->sk_drops);
        __skb_queue_tail(list, skb);
@@ -327,6 +338,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 
        skb->dev = NULL;
 
+       if (sk_rcvqueues_full(sk, skb)) {
+               atomic_inc(&sk->sk_drops);
+               goto discard_and_relse;
+       }
        if (nested)
                bh_lock_sock_nested(sk);
        else
@@ -340,8 +355,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
                rc = sk_backlog_rcv(sk, skb);
 
                mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-       } else
-               sk_add_backlog(sk, skb);
+       } else if (sk_add_backlog(sk, skb)) {
+               bh_unlock_sock(sk);
+               atomic_inc(&sk->sk_drops);
+               goto discard_and_relse;
+       }
+
        bh_unlock_sock(sk);
 out:
        sock_put(sk);
@@ -360,11 +379,11 @@ EXPORT_SYMBOL(sk_reset_txq);
 
 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 {
-       struct dst_entry *dst = sk->sk_dst_cache;
+       struct dst_entry *dst = __sk_dst_get(sk);
 
        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
                sk_tx_queue_clear(sk);
-               sk->sk_dst_cache = NULL;
+               rcu_assign_pointer(sk->sk_dst_cache, NULL);
                dst_release(dst);
                return NULL;
        }
@@ -1037,6 +1056,17 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
        module_put(owner);
 }
 
+#ifdef CONFIG_CGROUPS
+void sock_update_classid(struct sock *sk)
+{
+       u32 classid = task_cls_classid(current);
+
+       if (classid && classid != sk->sk_classid)
+               sk->sk_classid = classid;
+}
+EXPORT_SYMBOL(sock_update_classid);
+#endif
+
 /**
  *     sk_alloc - All socket objects are allocated here
  *     @net: the applicable net namespace
@@ -1060,6 +1090,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                sock_lock_init(sk);
                sock_net_set(sk, get_net(net));
                atomic_set(&sk->sk_wmem_alloc, 1);
+
+               sock_update_classid(sk);
        }
 
        return sk;
@@ -1139,6 +1171,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                sock_lock_init(newsk);
                bh_lock_sock(newsk);
                newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
+               newsk->sk_backlog.len = 0;
 
                atomic_set(&newsk->sk_rmem_alloc, 0);
                /*
@@ -1152,7 +1185,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                skb_queue_head_init(&newsk->sk_async_wait_queue);
 #endif
 
-               rwlock_init(&newsk->sk_dst_lock);
+               spin_lock_init(&newsk->sk_dst_lock);
                rwlock_init(&newsk->sk_callback_lock);
                lockdep_set_class_and_name(&newsk->sk_callback_lock,
                                af_callback_keys + newsk->sk_family,
@@ -1202,7 +1235,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                 */
                sk_refcnt_debug_inc(newsk);
                sk_set_socket(newsk, NULL);
-               newsk->sk_sleep  = NULL;
+               newsk->sk_wq = NULL;
 
                if (newsk->sk_prot->sockets_allocated)
                        percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1222,6 +1255,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
        sk->sk_route_caps = dst->dev->features;
        if (sk->sk_route_caps & NETIF_F_GSO)
                sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+       sk->sk_route_caps &= ~sk->sk_route_nocaps;
        if (sk_can_gso(sk)) {
                if (dst->header_len) {
                        sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
@@ -1390,7 +1424,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                if (signal_pending(current))
                        break;
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-               prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
                        break;
                if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1399,7 +1433,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                        break;
                timeo = schedule_timeout(timeo);
        }
-       finish_wait(sk->sk_sleep, &wait);
+       finish_wait(sk_sleep(sk), &wait);
        return timeo;
 }
 
@@ -1526,6 +1560,7 @@ static void __release_sock(struct sock *sk)
                do {
                        struct sk_buff *next = skb->next;
 
+                       WARN_ON_ONCE(skb_dst_is_noref(skb));
                        skb->next = NULL;
                        sk_backlog_rcv(sk, skb);
 
@@ -1542,6 +1577,12 @@ static void __release_sock(struct sock *sk)
 
                bh_lock_sock(sk);
        } while ((skb = sk->sk_backlog.head) != NULL);
+
+       /*
+        * Doing the zeroing here guarantee we can not loop forever
+        * while a wild producer attempts to flood us.
+        */
+       sk->sk_backlog.len = 0;
 }
 
 /**
@@ -1559,11 +1600,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
        int rc;
        DEFINE_WAIT(wait);
 
-       prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
        set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
        rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
        clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-       finish_wait(sk->sk_sleep, &wait);
+       finish_wait(sk_sleep(sk), &wait);
        return rc;
 }
 EXPORT_SYMBOL(sk_wait_data);
@@ -1785,41 +1826,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
 
 static void sock_def_wakeup(struct sock *sk)
 {
-       read_lock(&sk->sk_callback_lock);
-       if (sk_has_sleeper(sk))
-               wake_up_interruptible_all(sk->sk_sleep);
-       read_unlock(&sk->sk_callback_lock);
+       struct socket_wq *wq;
+
+       rcu_read_lock();
+       wq = rcu_dereference(sk->sk_wq);
+       if (wq_has_sleeper(wq))
+               wake_up_interruptible_all(&wq->wait);
+       rcu_read_unlock();
 }
 
 static void sock_def_error_report(struct sock *sk)
 {
-       read_lock(&sk->sk_callback_lock);
-       if (sk_has_sleeper(sk))
-               wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
+       struct socket_wq *wq;
+
+       rcu_read_lock();
+       wq = rcu_dereference(sk->sk_wq);
+       if (wq_has_sleeper(wq))
+               wake_up_interruptible_poll(&wq->wait, POLLERR);
        sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
-       read_unlock(&sk->sk_callback_lock);
+       rcu_read_unlock();
 }
 
 static void sock_def_readable(struct sock *sk, int len)
 {
-       read_lock(&sk->sk_callback_lock);
-       if (sk_has_sleeper(sk))
-               wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+       struct socket_wq *wq;
+
+       rcu_read_lock();
+       wq = rcu_dereference(sk->sk_wq);
+       if (wq_has_sleeper(wq))
+               wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
                                                POLLRDNORM | POLLRDBAND);
        sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-       read_unlock(&sk->sk_callback_lock);
+       rcu_read_unlock();
 }
 
 static void sock_def_write_space(struct sock *sk)
 {
-       read_lock(&sk->sk_callback_lock);
+       struct socket_wq *wq;
+
+       rcu_read_lock();
 
        /* Do not wake up a writer until he can make "significant"
         * progress.  --DaveM
         */
        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-               if (sk_has_sleeper(sk))
-                       wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+               wq = rcu_dereference(sk->sk_wq);
+               if (wq_has_sleeper(wq))
+                       wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
                                                POLLWRNORM | POLLWRBAND);
 
                /* Should agree with poll, otherwise some programs break */
@@ -1827,7 +1880,7 @@ static void sock_def_write_space(struct sock *sk)
                        sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
        }
 
-       read_unlock(&sk->sk_callback_lock);
+       rcu_read_unlock();
 }
 
 static void sock_def_destruct(struct sock *sk)
@@ -1881,12 +1934,12 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
        if (sock) {
                sk->sk_type     =       sock->type;
-               sk->sk_sleep    =       &sock->wait;
+               sk->sk_wq       =       sock->wq;
                sock->sk        =       sk;
        } else
-               sk->sk_sleep    =       NULL;
+               sk->sk_wq       =       NULL;
 
-       rwlock_init(&sk->sk_dst_lock);
+       spin_lock_init(&sk->sk_dst_lock);
        rwlock_init(&sk->sk_callback_lock);
        lockdep_set_class_and_name(&sk->sk_callback_lock,
                        af_callback_keys + sk->sk_family,
@@ -2276,7 +2329,8 @@ out_free_request_sock_slab:
                prot->rsk_prot->slab = NULL;
        }
 out_free_request_sock_slab_name:
-       kfree(prot->rsk_prot->slab_name);
+       if (prot->rsk_prot)
+               kfree(prot->rsk_prot->slab_name);
 out_free_sock_slab:
        kmem_cache_destroy(prot->slab);
        prot->slab = NULL;