sctp: Fix a race between ICMP protocol unreachable and connect()
[safe/jmp/linux-2.6] / net / sctp / input.c
index 812ff17..ea21924 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/socket.h>
 #include <linux/ip.h>
 #include <linux/time.h> /* For struct timeval */
+#include <linux/slab.h>
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/snmp.h>
@@ -61,6 +62,7 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 #include <net/sctp/checksum.h>
+#include <net/net_namespace.h>
 
 /* Forward declarations for internal helpers. */
 static int sctp_rcv_ootb(struct sk_buff *);
@@ -74,22 +76,23 @@ static struct sctp_association *__sctp_lookup_association(
                                        const union sctp_addr *peer,
                                        struct sctp_transport **pt);
 
-static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
+static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
 
 
 /* Calculate the SCTP checksum of an SCTP packet.  */
 static inline int sctp_rcv_checksum(struct sk_buff *skb)
 {
-       struct sk_buff *list = skb_shinfo(skb)->frag_list;
        struct sctphdr *sh = sctp_hdr(skb);
-       __u32 cmp = ntohl(sh->checksum);
-       __u32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
+       __le32 cmp = sh->checksum;
+       struct sk_buff *list;
+       __le32 val;
+       __u32 tmp = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
 
-       for (; list; list = list->next)
-               val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
-                                       val);
+       skb_walk_frags(skb, list)
+               tmp = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
+                                       tmp);
 
-       val = sctp_end_cksum(val);
+       val = sctp_end_cksum(tmp);
 
        if (val != cmp) {
                /* CRC failure, dump it. */
@@ -141,7 +144,8 @@ int sctp_rcv(struct sk_buff *skb)
        __skb_pull(skb, skb_transport_offset(skb));
        if (skb->len < sizeof(struct sctphdr))
                goto discard_it;
-       if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0)
+       if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
+                 sctp_rcv_checksum(skb) < 0)
                goto discard_it;
 
        skb_pull(skb, sizeof(struct sctphdr));
@@ -248,9 +252,27 @@ int sctp_rcv(struct sk_buff *skb)
         */
        sctp_bh_lock_sock(sk);
 
+       if (sk != rcvr->sk) {
+               /* Our cached sk is different from the rcvr->sk.  This is
+                * because migrate()/accept() may have moved the association
+                * to a new socket and released all the sockets.  So now we
+                * are holding a lock on the old socket while the user may
+                * be doing something with the new socket.  Switch our veiw
+                * of the current sk.
+                */
+               sctp_bh_unlock_sock(sk);
+               sk = rcvr->sk;
+               sctp_bh_lock_sock(sk);
+       }
+
        if (sock_owned_by_user(sk)) {
+               if (sctp_add_backlog(sk, skb)) {
+                       sctp_bh_unlock_sock(sk);
+                       sctp_chunk_free(chunk);
+                       skb = NULL; /* sctp_chunk_free already freed the skb */
+                       goto discard_release;
+               }
                SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
-               sctp_add_backlog(sk, skb);
        } else {
                SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
                sctp_inq_push(&chunk->rcvr->inqueue, chunk);
@@ -320,8 +342,10 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
                sctp_bh_lock_sock(sk);
 
                if (sock_owned_by_user(sk)) {
-                       sk_add_backlog(sk, skb);
-                       backloged = 1;
+                       if (sk_add_backlog(sk, skb))
+                               sctp_chunk_free(chunk);
+                       else
+                               backloged = 1;
                } else
                        sctp_inq_push(inqueue, chunk);
 
@@ -346,29 +370,34 @@ done:
        return 0;
 }
 
-static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
+static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
        struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
        struct sctp_ep_common *rcvr = chunk->rcvr;
+       int ret;
 
-       /* Hold the assoc/ep while hanging on the backlog queue.
-        * This way, we know structures we need will not disappear from us
-        */
-       if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
-               sctp_association_hold(sctp_assoc(rcvr));
-       else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
-               sctp_endpoint_hold(sctp_ep(rcvr));
-       else
-               BUG();
+       ret = sk_add_backlog(sk, skb);
+       if (!ret) {
+               /* Hold the assoc/ep while hanging on the backlog queue.
+                * This way, we know structures we need will not disappear
+                * from us
+                */
+               if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
+                       sctp_association_hold(sctp_assoc(rcvr));
+               else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
+                       sctp_endpoint_hold(sctp_ep(rcvr));
+               else
+                       BUG();
+       }
+       return ret;
 
-       sk_add_backlog(sk, skb);
 }
 
 /* Handle icmp frag needed error. */
 void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
                           struct sctp_transport *t, __u32 pmtu)
 {
-       if (!t || (t->pathmtu == pmtu))
+       if (!t || (t->pathmtu <= pmtu))
                return;
 
        if (sock_owned_by_user(sk)) {
@@ -409,13 +438,27 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
                           struct sctp_association *asoc,
                           struct sctp_transport *t)
 {
-       SCTP_DEBUG_PRINTK("%s\n",  __FUNCTION__);
+       SCTP_DEBUG_PRINTK("%s\n",  __func__);
 
-       sctp_do_sm(SCTP_EVENT_T_OTHER,
-                  SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
-                  asoc->state, asoc->ep, asoc, t,
-                  GFP_ATOMIC);
+       if (sock_owned_by_user(sk)) {
+               if (timer_pending(&t->proto_unreach_timer))
+                       return;
+               else {
+                       if (!mod_timer(&t->proto_unreach_timer,
+                                               jiffies + (HZ/20)))
+                               sctp_association_hold(asoc);
+               }
+                       
+       } else {
+               if (timer_pending(&t->proto_unreach_timer) &&
+                   del_timer(&t->proto_unreach_timer))
+                       sctp_association_put(asoc);
 
+               sctp_do_sm(SCTP_EVENT_T_OTHER,
+                          SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
+                          asoc->state, asoc->ep, asoc, t,
+                          GFP_ATOMIC);
+       }
 }
 
 /* Common lookup code for icmp/icmpv6 error handler. */
@@ -430,6 +473,9 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
        struct sock *sk = NULL;
        struct sctp_association *asoc;
        struct sctp_transport *transport = NULL;
+       struct sctp_init_chunk *chunkhdr;
+       __u32 vtag = ntohl(sctphdr->vtag);
+       int len = skb->len - ((void *)sctphdr - (void *)skb->data);
 
        *app = NULL; *tpp = NULL;
 
@@ -451,8 +497,28 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
 
        sk = asoc->base.sk;
 
-       if (ntohl(sctphdr->vtag) != asoc->c.peer_vtag) {
-               ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+       /* RFC 4960, Appendix C. ICMP Handling
+        *
+        * ICMP6) An implementation MUST validate that the Verification Tag
+        * contained in the ICMP message matches the Verification Tag of
+        * the peer.  If the Verification Tag is not 0 and does NOT
+        * match, discard the ICMP message.  If it is 0 and the ICMP
+        * message contains enough bytes to verify that the chunk type is
+        * an INIT chunk and that the Initiate Tag matches the tag of the
+        * peer, continue with ICMP7.  If the ICMP message is too short
+        * or the chunk type or the Initiate Tag does not match, silently
+        * discard the packet.
+        */
+       if (vtag == 0) {
+               chunkhdr = (struct sctp_init_chunk *)((void *)sctphdr
+                               + sizeof(struct sctphdr));
+               if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t)
+                         + sizeof(__be32) ||
+                   chunkhdr->chunk_hdr.type != SCTP_CID_INIT ||
+                   ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) {
+                       goto out;
+               }
+       } else if (vtag != asoc->c.peer_vtag) {
                goto out;
        }
 
@@ -462,7 +528,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
         * servers this needs to be solved differently.
         */
        if (sock_owned_by_user(sk))
-               NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+               NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS);
 
        *app = asoc;
        *tpp = transport;
@@ -511,7 +577,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
        int err;
 
        if (skb->len < ihlen + 8) {
-               ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+               ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
                return;
        }
 
@@ -525,7 +591,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
        skb->network_header = saveip;
        skb->transport_header = savesctp;
        if (!sk) {
-               ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+               ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
                return;
        }
        /* Warning:  The sock lock is held.  Remember to call
@@ -725,7 +791,6 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l
        }
 
        ep = sctp_sk((sctp_get_ctl_sock()))->ep;
-       epb = &ep->base;
 
 hit:
        sctp_endpoint_hold(ep);