[UDP]: Add memory accounting.
[safe/jmp/linux-2.6] / net / ipv4 / udp.c
index 113e0c4..3532843 100644 (file)
@@ -70,6 +70,7 @@
  *     Alexey Kuznetsov:               allow both IPv4 and IPv6 sockets to bind
  *                                     a single port at the same time.
  *     Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
+ *     James Chapman           :       Add L2TP encapsulation type.
  *
  *
  *             This program is free software; you can redistribute it and/or
@@ -81,6 +82,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
+#include <linux/bootmem.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/module.h>
@@ -97,6 +99,7 @@
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <net/net_namespace.h>
 #include <net/icmp.h>
 #include <net/route.h>
 #include <net/checksum.h>
  */
 
 DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
+EXPORT_SYMBOL(udp_statistics);
+
+DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
+EXPORT_SYMBOL(udp_stats_in6);
 
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
-static int udp_port_rover;
+int sysctl_udp_mem[3] __read_mostly;
+int sysctl_udp_rmem_min __read_mostly;
+int sysctl_udp_wmem_min __read_mostly;
 
-/*
- * Note about this hash function :
- * Typical use is probably daddr = 0, only dport is going to vary hash
- */
-static inline unsigned int hash_port_and_addr(__u16 port, __be32 addr)
-{
-       addr ^= addr >> 16;
-       addr ^= addr >> 8;
-       return port ^ addr;
-}
+EXPORT_SYMBOL(sysctl_udp_mem);
+EXPORT_SYMBOL(sysctl_udp_rmem_min);
+EXPORT_SYMBOL(sysctl_udp_wmem_min);
+
+atomic_t udp_memory_allocated;
+EXPORT_SYMBOL(udp_memory_allocated);
 
-static inline int __udp_lib_port_inuse(unsigned int hash, int port,
-       __be32 daddr, struct hlist_head udptable[])
+static inline int __udp_lib_lport_inuse(__u16 num,
+                                       const struct hlist_head udptable[])
 {
        struct sock *sk;
        struct hlist_node *node;
-       struct inet_sock *inet;
 
-       sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
-               if (sk->sk_hash != hash)
-                       continue;
-               inet = inet_sk(sk);
-               if (inet->num != port)
-                       continue;
-               if (inet->rcv_saddr == daddr)
+       sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
+               if (sk->sk_hash == num)
                        return 1;
-       }
        return 0;
 }
 
@@ -150,112 +148,87 @@ static inline int __udp_lib_port_inuse(unsigned int hash, int port,
  *  @sk:          socket struct in question
  *  @snum:        port number to look up
  *  @udptable:    hash list table, must be of UDP_HTABLE_SIZE
- *  @port_rover:  pointer to record of last unallocated port
  *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
  */
 int __udp_lib_get_port(struct sock *sk, unsigned short snum,
-                      struct hlist_head udptable[], int *port_rover,
+                      struct hlist_head udptable[],
                       int (*saddr_comp)(const struct sock *sk1,
                                         const struct sock *sk2 )    )
 {
        struct hlist_node *node;
        struct hlist_head *head;
        struct sock *sk2;
-       unsigned int hash;
        int    error = 1;
 
        write_lock_bh(&udp_hash_lock);
-       if (snum == 0) {
-               int best_size_so_far, best, result, i;
-
-               if (*port_rover > sysctl_local_port_range[1] ||
-                   *port_rover < sysctl_local_port_range[0])
-                       *port_rover = sysctl_local_port_range[0];
-               best_size_so_far = 32767;
-               best = result = *port_rover;
-               for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
-                       int size;
-
-                       hash = hash_port_and_addr(result,
-                                       inet_sk(sk)->rcv_saddr);
-                       head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
-                       if (hlist_empty(head)) {
-                               if (result > sysctl_local_port_range[1])
-                                       result = sysctl_local_port_range[0] +
-                                               ((result - sysctl_local_port_range[0]) &
-                                                (UDP_HTABLE_SIZE - 1));
+
+       if (!snum) {
+               int i, low, high, remaining;
+               unsigned rover, best, best_size_so_far;
+
+               inet_get_local_port_range(&low, &high);
+               remaining = (high - low) + 1;
+
+               best_size_so_far = UINT_MAX;
+               best = rover = net_random() % remaining + low;
+
+               /* 1st pass: look for empty (or shortest) hash chain */
+               for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+                       int size = 0;
+
+                       head = &udptable[rover & (UDP_HTABLE_SIZE - 1)];
+                       if (hlist_empty(head))
                                goto gotit;
-                       }
-                       size = 0;
+
                        sk_for_each(sk2, node, head) {
                                if (++size >= best_size_so_far)
                                        goto next;
                        }
                        best_size_so_far = size;
-                       best = result;
+                       best = rover;
                next:
-                       ;
+                       /* fold back if end of range */
+                       if (++rover > high)
+                               rover = low + ((rover - low)
+                                              & (UDP_HTABLE_SIZE - 1));
+
+
                }
-               result = best;
-               for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE;
-                    i++, result += UDP_HTABLE_SIZE) {
-                       if (result > sysctl_local_port_range[1])
-                               result = sysctl_local_port_range[0]
-                                       + ((result - sysctl_local_port_range[0]) &
-                                          (UDP_HTABLE_SIZE - 1));
-                       hash = hash_port_and_addr(result, 0);
-                       if (__udp_lib_port_inuse(hash, result,
-                                                0, udptable))
-                               continue;
-                       if (!inet_sk(sk)->rcv_saddr)
-                               break;
 
-                       hash = hash_port_and_addr(result,
-                                       inet_sk(sk)->rcv_saddr);
-                       if (! __udp_lib_port_inuse(hash, result,
-                               inet_sk(sk)->rcv_saddr, udptable))
-                               break;
+               /* 2nd pass: find hole in shortest hash chain */
+               rover = best;
+               for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
+                       if (! __udp_lib_lport_inuse(rover, udptable))
+                               goto gotit;
+                       rover += UDP_HTABLE_SIZE;
+                       if (rover > high)
+                               rover = low + ((rover - low)
+                                              & (UDP_HTABLE_SIZE - 1));
                }
-               if (i >= (1 << 16) / UDP_HTABLE_SIZE)
-                       goto fail;
+
+
+               /* All ports in use! */
+               goto fail;
+
 gotit:
-               *port_rover = snum = result;
+               snum = rover;
        } else {
-               hash = hash_port_and_addr(snum, 0);
-               head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+               head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
 
                sk_for_each(sk2, node, head)
-                       if (sk2->sk_hash == hash &&
-                           sk2 != sk &&
-                           inet_sk(sk2)->num == snum &&
-                           (!sk2->sk_reuse || !sk->sk_reuse) &&
-                           (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
-                            sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-                           (*saddr_comp)(sk, sk2))
+                       if (sk2->sk_hash == snum                             &&
+                           sk2 != sk                                        &&
+                           (!sk2->sk_reuse        || !sk->sk_reuse)         &&
+                           (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+                            || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+                           (*saddr_comp)(sk, sk2)                             )
                                goto fail;
-
-               if (inet_sk(sk)->rcv_saddr) {
-                       hash = hash_port_and_addr(snum,
-                                                 inet_sk(sk)->rcv_saddr);
-                       head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
-
-                       sk_for_each(sk2, node, head)
-                               if (sk2->sk_hash == hash &&
-                                   sk2 != sk &&
-                                   inet_sk(sk2)->num == snum &&
-                                   (!sk2->sk_reuse || !sk->sk_reuse) &&
-                                   (!sk2->sk_bound_dev_if ||
-                                    !sk->sk_bound_dev_if ||
-                                    sk2->sk_bound_dev_if ==
-                                    sk->sk_bound_dev_if) &&
-                                   (*saddr_comp)(sk, sk2))
-                                       goto fail;
-               }
        }
+
        inet_sk(sk)->num = snum;
-       sk->sk_hash = hash;
+       sk->sk_hash = snum;
        if (sk_unhashed(sk)) {
-               head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+               head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
                sk_add_node(sk, head);
                sock_prot_inc_use(sk->sk_prot);
        }
@@ -268,7 +241,7 @@ fail:
 int udp_get_port(struct sock *sk, unsigned short snum,
                        int (*scmp)(const struct sock *, const struct sock *))
 {
-       return  __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
+       return  __udp_lib_get_port(sk, snum, udp_hash, scmp);
 }
 
 int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
@@ -294,77 +267,63 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
 {
        struct sock *sk, *result = NULL;
        struct hlist_node *node;
-       unsigned int hash, hashwild;
-       int score, best = -1, hport = ntohs(dport);
-
-       hash = hash_port_and_addr(hport, daddr);
-       hashwild = hash_port_and_addr(hport, 0);
+       unsigned short hnum = ntohs(dport);
+       int badness = -1;
 
        read_lock(&udp_hash_lock);
-
-lookup:
-
-       sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
+       sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
                struct inet_sock *inet = inet_sk(sk);
 
-               if (sk->sk_hash != hash || ipv6_only_sock(sk) ||
-                       inet->num != hport)
-                       continue;
-
-               score = (sk->sk_family == PF_INET ? 1 : 0);
-               if (inet->rcv_saddr) {
-                       if (inet->rcv_saddr != daddr)
-                               continue;
-                       score+=2;
-               }
-               if (inet->daddr) {
-                       if (inet->daddr != saddr)
-                               continue;
-                       score+=2;
-               }
-               if (inet->dport) {
-                       if (inet->dport != sport)
-                               continue;
-                       score+=2;
-               }
-               if (sk->sk_bound_dev_if) {
-                       if (sk->sk_bound_dev_if != dif)
-                               continue;
-                       score+=2;
-               }
-               if (score == 9) {
-                       result = sk;
-                       goto found;
-               } else if (score > best) {
-                       result = sk;
-                       best = score;
+               if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
+                       int score = (sk->sk_family == PF_INET ? 1 : 0);
+                       if (inet->rcv_saddr) {
+                               if (inet->rcv_saddr != daddr)
+                                       continue;
+                               score+=2;
+                       }
+                       if (inet->daddr) {
+                               if (inet->daddr != saddr)
+                                       continue;
+                               score+=2;
+                       }
+                       if (inet->dport) {
+                               if (inet->dport != sport)
+                                       continue;
+                               score+=2;
+                       }
+                       if (sk->sk_bound_dev_if) {
+                               if (sk->sk_bound_dev_if != dif)
+                                       continue;
+                               score+=2;
+                       }
+                       if (score == 9) {
+                               result = sk;
+                               break;
+                       } else if (score > badness) {
+                               result = sk;
+                               badness = score;
+                       }
                }
        }
-
-       if (hash != hashwild) {
-               hash = hashwild;
-               goto lookup;
-       }
-found:
        if (result)
                sock_hold(result);
        read_unlock(&udp_hash_lock);
        return result;
 }
 
-static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum,
-                                            int hport, __be32 loc_addr,
+static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+                                            __be16 loc_port, __be32 loc_addr,
                                             __be16 rmt_port, __be32 rmt_addr,
                                             int dif)
 {
        struct hlist_node *node;
        struct sock *s = sk;
+       unsigned short hnum = ntohs(loc_port);
 
        sk_for_each_from(s, node) {
                struct inet_sock *inet = inet_sk(s);
 
                if (s->sk_hash != hnum                                  ||
-                   inet->num != hport                                  ||
                    (inet->daddr && inet->daddr != rmt_addr)            ||
                    (inet->dport != rmt_port && inet->dport)            ||
                    (inet->rcv_saddr && inet->rcv_saddr != loc_addr)    ||
@@ -528,6 +487,7 @@ static int udp_push_pending_frames(struct sock *sk)
        struct sk_buff *skb;
        struct udphdr *uh;
        int err = 0;
+       int is_udplite = IS_UDPLITE(sk);
        __wsum csum = 0;
 
        /* Grab the skbuff where UDP header space exists. */
@@ -543,7 +503,7 @@ static int udp_push_pending_frames(struct sock *sk)
        uh->len = htons(up->len);
        uh->check = 0;
 
-       if (up->pcflag)                                  /*     UDP-Lite      */
+       if (is_udplite)                                  /*     UDP-Lite      */
                csum  = udplite_csum_outgoing(sk, skb);
 
        else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
@@ -570,6 +530,8 @@ send:
 out:
        up->len = 0;
        up->pending = 0;
+       if (!err)
+               UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
        return err;
 }
 
@@ -586,7 +548,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        __be32 daddr, faddr, saddr;
        __be16 dport;
        u8  tos;
-       int err, is_udplite = up->pcflag;
+       int err, is_udplite = IS_UDPLITE(sk);
        int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
        int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
 
@@ -676,7 +638,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                connected = 0;
        }
 
-       if (MULTICAST(daddr)) {
+       if (ipv4_is_multicast(daddr)) {
                if (!ipc.oif)
                        ipc.oif = inet->mc_index;
                if (!saddr)
@@ -699,8 +661,11 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                                                 .dport = dport } } };
                security_sk_classify_flow(sk, &fl);
                err = ip_route_output_flow(&rt, &fl, sk, 1);
-               if (err)
+               if (err) {
+                       if (err == -ENETUNREACH)
+                               IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
                        goto out;
+               }
 
                err = -EACCES;
                if ((rt->rt_flags & RTCF_BROADCAST) &&
@@ -755,10 +720,8 @@ out:
        ip_rt_put(rt);
        if (free)
                kfree(ipc.opt);
-       if (!err) {
-               UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
+       if (!err)
                return len;
-       }
        /*
         * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
         * ENOBUFS might not be good (it's not tunable per se), but otherwise
@@ -879,6 +842,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
        struct sk_buff *skb;
        unsigned int ulen, copied;
+       int peeked;
        int err;
        int is_udplite = IS_UDPLITE(sk);
 
@@ -892,7 +856,8 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                return ip_recv_error(sk, msg, len);
 
 try_again:
-       skb = skb_recv_datagram(sk, flags, noblock, &err);
+       skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+                                 &peeked, &err);
        if (!skb)
                goto out;
 
@@ -927,6 +892,9 @@ try_again:
        if (err)
                goto out_free;
 
+       if (!peeked)
+               UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+
        sock_recv_timestamp(msg, sk, skb);
 
        /* Copy the address. */
@@ -945,14 +913,17 @@ try_again:
                err = ulen;
 
 out_free:
+       lock_sock(sk);
        skb_free_datagram(sk, skb);
+       release_sock(sk);
 out:
        return err;
 
 csum_copy_err:
-       UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
-
-       skb_kill_datagram(sk, skb, flags);
+       lock_sock(sk);
+       if (!skb_kill_datagram(sk, skb, flags))
+               UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
+       release_sock(sk);
 
        if (noblock)
                return -EAGAIN;
@@ -982,104 +953,6 @@ int udp_disconnect(struct sock *sk, int flags)
        return 0;
 }
 
-/* return:
- *     1  if the the UDP system should process it
- *     0  if we should drop this packet
- *     -1 if it should get processed by xfrm4_rcv_encap
- */
-static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
-{
-#ifndef CONFIG_XFRM
-       return 1;
-#else
-       struct udp_sock *up = udp_sk(sk);
-       struct udphdr *uh;
-       struct iphdr *iph;
-       int iphlen, len;
-
-       __u8 *udpdata;
-       __be32 *udpdata32;
-       __u16 encap_type = up->encap_type;
-
-       /* if we're overly short, let UDP handle it */
-       len = skb->len - sizeof(struct udphdr);
-       if (len <= 0)
-               return 1;
-
-       /* if this is not encapsulated socket, then just return now */
-       if (!encap_type)
-               return 1;
-
-       /* If this is a paged skb, make sure we pull up
-        * whatever data we need to look at. */
-       if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
-               return 1;
-
-       /* Now we can get the pointers */
-       uh = udp_hdr(skb);
-       udpdata = (__u8 *)uh + sizeof(struct udphdr);
-       udpdata32 = (__be32 *)udpdata;
-
-       switch (encap_type) {
-       default:
-       case UDP_ENCAP_ESPINUDP:
-               /* Check if this is a keepalive packet.  If so, eat it. */
-               if (len == 1 && udpdata[0] == 0xff) {
-                       return 0;
-               } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
-                       /* ESP Packet without Non-ESP header */
-                       len = sizeof(struct udphdr);
-               } else
-                       /* Must be an IKE packet.. pass it through */
-                       return 1;
-               break;
-       case UDP_ENCAP_ESPINUDP_NON_IKE:
-               /* Check if this is a keepalive packet.  If so, eat it. */
-               if (len == 1 && udpdata[0] == 0xff) {
-                       return 0;
-               } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
-                          udpdata32[0] == 0 && udpdata32[1] == 0) {
-
-                       /* ESP Packet with Non-IKE marker */
-                       len = sizeof(struct udphdr) + 2 * sizeof(u32);
-               } else
-                       /* Must be an IKE packet.. pass it through */
-                       return 1;
-               break;
-       }
-
-       /* At this point we are sure that this is an ESPinUDP packet,
-        * so we need to remove 'len' bytes from the packet (the UDP
-        * header and optional ESP marker bytes) and then modify the
-        * protocol to ESP, and then call into the transform receiver.
-        */
-       if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-               return 0;
-
-       /* Now we can update and verify the packet length... */
-       iph = ip_hdr(skb);
-       iphlen = iph->ihl << 2;
-       iph->tot_len = htons(ntohs(iph->tot_len) - len);
-       if (skb->len < iphlen + len) {
-               /* packet is too small!?! */
-               return 0;
-       }
-
-       /* pull the data buffer up to the ESP header and set the
-        * transport header to point to ESP.  Keep UDP on the stack
-        * for later.
-        */
-       __skb_pull(skb, len);
-       skb_reset_transport_header(skb);
-
-       /* modify the protocol (it's ESP!) */
-       iph->protocol = IPPROTO_ESP;
-
-       /* and let the caller know to send this into the ESP processor... */
-       return -1;
-#endif
-}
-
 /* returns:
  *  -1: error
  *   0: success
@@ -1092,6 +965,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
        struct udp_sock *up = udp_sk(sk);
        int rc;
+       int is_udplite = IS_UDPLITE(sk);
 
        /*
         *      Charge it to the socket, dropping if the queue is full.
@@ -1102,35 +976,36 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 
        if (up->encap_type) {
                /*
-                * This is an encapsulation socket, so let's see if this is
-                * an encapsulated packet.
-                * If it's a keepalive packet, then just eat it.
-                * If it's an encapsulateed packet, then pass it to the
-                * IPsec xfrm input and return the response
-                * appropriately.  Otherwise, just fall through and
-                * pass this up the UDP socket.
+                * This is an encapsulation socket so pass the skb to
+                * the socket's udp_encap_rcv() hook. Otherwise, just
+                * fall through and pass this up the UDP socket.
+                * up->encap_rcv() returns the following value:
+                * =0 if skb was successfully passed to the encap
+                *    handler or was discarded by it.
+                * >0 if skb should be passed on to UDP.
+                * <0 if skb should be resubmitted as proto -N
                 */
-               int ret;
 
-               ret = udp_encap_rcv(sk, skb);
-               if (ret == 0) {
-                       /* Eat the packet .. */
-                       kfree_skb(skb);
-                       return 0;
-               }
-               if (ret < 0) {
-                       /* process the ESP packet */
-                       ret = xfrm4_rcv_encap(skb, up->encap_type);
-                       UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
-                       return -ret;
+               /* if we're overly short, let UDP handle it */
+               if (skb->len > sizeof(struct udphdr) &&
+                   up->encap_rcv != NULL) {
+                       int ret;
+
+                       ret = (*up->encap_rcv)(sk, skb);
+                       if (ret <= 0) {
+                               UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
+                                                is_udplite);
+                               return -ret;
+                       }
                }
+
                /* FALLTHROUGH -- it's a UDP Packet */
        }
 
        /*
         *      UDP-Lite specific tests, ignored on UDP sockets
         */
-       if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
+       if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
 
                /*
                 * MIB statistics other than incrementing the error count are
@@ -1171,15 +1046,14 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
        if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
                /* Note that an ENOMEM error is charged twice */
                if (rc == -ENOMEM)
-                       UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
+                       UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
                goto drop;
        }
 
-       UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
        return 0;
 
 drop:
-       UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
+       UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
        kfree_skb(skb);
        return -1;
 }
@@ -1195,45 +1069,37 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
                                    __be32 saddr, __be32 daddr,
                                    struct hlist_head udptable[])
 {
-       struct sock *sk, *skw, *sknext;
+       struct sock *sk;
        int dif;
-       int hport = ntohs(uh->dest);
-       unsigned int hash = hash_port_and_addr(hport, daddr);
-       unsigned int hashwild = hash_port_and_addr(hport, 0);
-
-       dif = skb->dev->ifindex;
 
        read_lock(&udp_hash_lock);
-
-       sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]);
-       skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]);
-
-       sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif);
-       if (!sk) {
-               hash = hashwild;
-               sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source,
-                       saddr, dif);
-       }
+       sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+       dif = skb->dev->ifindex;
+       sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
        if (sk) {
+               struct sock *sknext = NULL;
+
                do {
                        struct sk_buff *skb1 = skb;
-                       sknext = udp_v4_mcast_next(sk_next(sk), hash, hport,
-                                               daddr, uh->source, saddr, dif);
-                       if (!sknext && hash != hashwild) {
-                               hash = hashwild;
-                               sknext = udp_v4_mcast_next(skw, hash, hport,
-                                       daddr, uh->source, saddr, dif);
-                       }
+
+                       sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
+                                                  uh->source, saddr, dif);
                        if (sknext)
                                skb1 = skb_clone(skb, GFP_ATOMIC);
 
                        if (skb1) {
-                               int ret = udp_queue_rcv_skb(sk, skb1);
+                               int ret = 0;
+
+                               bh_lock_sock_nested(sk);
+                               if (!sock_owned_by_user(sk))
+                                       ret = udp_queue_rcv_skb(sk, skb1);
+                               else
+                                       sk_add_backlog(sk, skb1);
+                               bh_unlock_sock(sk);
+
                                if (ret > 0)
-                                       /*
-                                        * we should probably re-process
-                                        * instead of dropping packets here.
-                                        */
+                                       /* we should probably re-process instead
+                                        * of dropping packets here. */
                                        kfree_skb(skb1);
                        }
                        sk = sknext;
@@ -1320,10 +1186,16 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
                return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
 
        sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
-                              skb->dev->ifindex, udptable);
+                              inet_iif(skb), udptable);
 
        if (sk != NULL) {
-               int ret = udp_queue_rcv_skb(sk, skb);
+               int ret = 0;
+               bh_lock_sock_nested(sk);
+               if (!sock_owned_by_user(sk))
+                       ret = udp_queue_rcv_skb(sk, skb);
+               else
+                       sk_add_backlog(sk, skb);
+               bh_unlock_sock(sk);
                sock_put(sk);
 
                /* a return value > 0 means to resubmit the input, but
@@ -1404,6 +1276,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
        struct udp_sock *up = udp_sk(sk);
        int val;
        int err = 0;
+       int is_udplite = IS_UDPLITE(sk);
 
        if (optlen<sizeof(int))
                return -EINVAL;
@@ -1428,6 +1301,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
                case 0:
                case UDP_ENCAP_ESPINUDP:
                case UDP_ENCAP_ESPINUDP_NON_IKE:
+                       up->encap_rcv = xfrm4_udp_encap_rcv;
+                       /* FALLTHROUGH */
+               case UDP_ENCAP_L2TPINUDP:
                        up->encap_type = val;
                        break;
                default:
@@ -1442,7 +1318,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
        /* The sender sets actual checksum coverage length via this option.
         * The case coverage > packet length is handled by send module. */
        case UDPLITE_SEND_CSCOV:
-               if (!up->pcflag)         /* Disable the option on UDP sockets */
+               if (!is_udplite)         /* Disable the option on UDP sockets */
                        return -ENOPROTOOPT;
                if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
                        val = 8;
@@ -1454,7 +1330,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
         * sense, this should be set to at least 8 (as done below). If zero is
         * used, this again means full checksum coverage.                     */
        case UDPLITE_RECV_CSCOV:
-               if (!up->pcflag)         /* Disable the option on UDP sockets */
+               if (!is_udplite)         /* Disable the option on UDP sockets */
                        return -ENOPROTOOPT;
                if (val != 0 && val < 8) /* Avoid silly minimal values.       */
                        val = 8;
@@ -1595,6 +1471,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
 }
 
+DEFINE_PROTO_INUSE(udp)
+
 struct proto udp_prot = {
        .name              = "UDP",
        .owner             = THIS_MODULE,
@@ -1612,11 +1490,16 @@ struct proto udp_prot = {
        .hash              = udp_lib_hash,
        .unhash            = udp_lib_unhash,
        .get_port          = udp_v4_get_port,
+       .memory_allocated  = &udp_memory_allocated,
+       .sysctl_mem        = sysctl_udp_mem,
+       .sysctl_wmem       = &sysctl_udp_wmem_min,
+       .sysctl_rmem       = &sysctl_udp_rmem_min,
        .obj_size          = sizeof(struct udp_sock),
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_udp_setsockopt,
        .compat_getsockopt = compat_udp_getsockopt,
 #endif
+       REF_PROTO_INUSE(udp)
 };
 
 /* ------------------------------------------------------------------------ */
@@ -1733,7 +1616,7 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo)
        afinfo->seq_fops->llseek        = seq_lseek;
        afinfo->seq_fops->release       = seq_release_private;
 
-       p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+       p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops);
        if (p)
                p->data = afinfo;
        else
@@ -1745,7 +1628,7 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
 {
        if (!afinfo)
                return;
-       proc_net_remove(afinfo->name);
+       proc_net_remove(&init_net, afinfo->name);
        memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
 }
 
@@ -1806,6 +1689,25 @@ void udp4_proc_exit(void)
 }
 #endif /* CONFIG_PROC_FS */
 
+void __init udp_init(void)
+{
+       unsigned long limit;
+
+       /* Set the pressure threshold up by the same strategy of TCP. It is a
+        * fraction of global memory that is up to 1/2 at 256 MB, decreasing
+        * toward zero with the amount of memory, with a floor of 128 pages.
+        */
+       limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+       limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+       limit = max(limit, 128UL);
+       sysctl_udp_mem[0] = limit / 4 * 3;
+       sysctl_udp_mem[1] = limit;
+       sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
+
+       sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+       sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+}
+
 EXPORT_SYMBOL(udp_disconnect);
 EXPORT_SYMBOL(udp_hash);
 EXPORT_SYMBOL(udp_hash_lock);