xfs: remove nr_to_write writeback windup.
[safe/jmp/linux-2.6] / net / ipv4 / af_inet.c
index 538e84d..551ce56 100644 (file)
@@ -86,6 +86,7 @@
 #include <linux/poll.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -153,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
        WARN_ON(sk->sk_forward_alloc);
 
        kfree(inet->opt);
-       dst_release(sk->sk_dst_cache);
+       dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
        sk_refcnt_debug_dec(sk);
 }
 EXPORT_SYMBOL(inet_sock_destruct);
@@ -262,7 +263,8 @@ static inline int inet_netns_ok(struct net *net, int protocol)
  *     Create an inet socket.
  */
 
-static int inet_create(struct net *net, struct socket *sock, int protocol)
+static int inet_create(struct net *net, struct socket *sock, int protocol,
+                      int kern)
 {
        struct sock *sk;
        struct inet_protosw *answer;
@@ -325,7 +327,7 @@ lookup_protocol:
        }
 
        err = -EPERM;
-       if (answer->capability > 0 && !capable(answer->capability))
+       if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
                goto out_rcu_unlock;
 
        err = -EAFNOSUPPORT;
@@ -417,6 +419,8 @@ int inet_release(struct socket *sock)
        if (sk) {
                long timeout;
 
+               sock_rps_reset_flow(sk);
+
                /* Applications forget to leave groups before exiting */
                ip_mc_drop_socket(sk);
 
@@ -529,6 +533,8 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
 {
        struct sock *sk = sock->sk;
 
+       if (addr_len < sizeof(uaddr->sa_family))
+               return -EINVAL;
        if (uaddr->sa_family == AF_UNSPEC)
                return sk->sk_prot->disconnect(sk, flags);
 
@@ -542,7 +548,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
 {
        DEFINE_WAIT(wait);
 
-       prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
        /* Basic assumption: if someone sets sk->sk_err, he _must_
         * change state of the socket from TCP_SYN_*.
@@ -555,9 +561,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
                lock_sock(sk);
                if (signal_pending(current) || !timeo)
                        break;
-               prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
        }
-       finish_wait(sk->sk_sleep, &wait);
+       finish_wait(sk_sleep(sk), &wait);
        return timeo;
 }
 
@@ -572,6 +578,9 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
        int err;
        long timeo;
 
+       if (addr_len < sizeof(uaddr->sa_family))
+               return -EINVAL;
+
        lock_sock(sk);
 
        if (uaddr->sa_family == AF_UNSPEC) {
@@ -713,6 +722,8 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
 
+       sock_rps_record_flow(sk);
+
        /* We may need to bind the socket. */
        if (!inet_sk(sk)->inet_num && inet_autobind(sk))
                return -EAGAIN;
@@ -721,12 +732,13 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
-
 static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
                             size_t size, int flags)
 {
        struct sock *sk = sock->sk;
 
+       sock_rps_record_flow(sk);
+
        /* We may need to bind the socket. */
        if (!inet_sk(sk)->inet_num && inet_autobind(sk))
                return -EAGAIN;
@@ -736,6 +748,22 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
        return sock_no_sendpage(sock, page, offset, size, flags);
 }
 
+int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+                size_t size, int flags)
+{
+       struct sock *sk = sock->sk;
+       int addr_len = 0;
+       int err;
+
+       sock_rps_record_flow(sk);
+
+       err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+                                  flags & ~MSG_DONTWAIT, &addr_len);
+       if (err >= 0)
+               msg->msg_namelen = addr_len;
+       return err;
+}
+EXPORT_SYMBOL(inet_recvmsg);
 
 int inet_shutdown(struct socket *sock, int how)
 {
@@ -865,7 +893,7 @@ const struct proto_ops inet_stream_ops = {
        .setsockopt        = sock_common_setsockopt,
        .getsockopt        = sock_common_getsockopt,
        .sendmsg           = tcp_sendmsg,
-       .recvmsg           = sock_common_recvmsg,
+       .recvmsg           = inet_recvmsg,
        .mmap              = sock_no_mmap,
        .sendpage          = tcp_sendpage,
        .splice_read       = tcp_splice_read,
@@ -892,7 +920,7 @@ const struct proto_ops inet_dgram_ops = {
        .setsockopt        = sock_common_setsockopt,
        .getsockopt        = sock_common_getsockopt,
        .sendmsg           = inet_sendmsg,
-       .recvmsg           = sock_common_recvmsg,
+       .recvmsg           = inet_recvmsg,
        .mmap              = sock_no_mmap,
        .sendpage          = inet_sendpage,
 #ifdef CONFIG_COMPAT
@@ -922,7 +950,7 @@ static const struct proto_ops inet_sockraw_ops = {
        .setsockopt        = sock_common_setsockopt,
        .getsockopt        = sock_common_getsockopt,
        .sendmsg           = inet_sendmsg,
-       .recvmsg           = sock_common_recvmsg,
+       .recvmsg           = inet_recvmsg,
        .mmap              = sock_no_mmap,
        .sendpage          = inet_sendpage,
 #ifdef CONFIG_COMPAT
@@ -947,7 +975,6 @@ static struct inet_protosw inetsw_array[] =
                .protocol =   IPPROTO_TCP,
                .prot =       &tcp_prot,
                .ops =        &inet_stream_ops,
-               .capability = -1,
                .no_check =   0,
                .flags =      INET_PROTOSW_PERMANENT |
                              INET_PROTOSW_ICSK,
@@ -958,7 +985,6 @@ static struct inet_protosw inetsw_array[] =
                .protocol =   IPPROTO_UDP,
                .prot =       &udp_prot,
                .ops =        &inet_dgram_ops,
-               .capability = -1,
                .no_check =   UDP_CSUM_DEFAULT,
                .flags =      INET_PROTOSW_PERMANENT,
        },
@@ -969,7 +995,6 @@ static struct inet_protosw inetsw_array[] =
               .protocol =   IPPROTO_IP,        /* wild card */
               .prot =       &raw_prot,
               .ops =        &inet_sockraw_ops,
-              .capability = CAP_NET_RAW,
               .no_check =   UDP_CSUM_DEFAULT,
               .flags =      INET_PROTOSW_REUSE,
        }
@@ -1298,8 +1323,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
        if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
                goto out_unlock;
 
-       id = ntohl(*(u32 *)&iph->id);
-       flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
+       id = ntohl(*(__be32 *)&iph->id);
+       flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
        id >>= 16;
 
        for (p = *head; p; p = p->next) {
@@ -1312,8 +1337,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 
                if ((iph->protocol ^ iph2->protocol) |
                    (iph->tos ^ iph2->tos) |
-                   (iph->saddr ^ iph2->saddr) |
-                   (iph->daddr ^ iph2->daddr)) {
+                   ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
+                   ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
                        NAPI_GRO_CB(p)->same_flow = 0;
                        continue;
                }
@@ -1387,7 +1412,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 }
 EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
 
-unsigned long snmp_fold_field(void *mib[], int offt)
+unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 {
        unsigned long res = 0;
        int i;
@@ -1400,13 +1425,13 @@ unsigned long snmp_fold_field(void *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
-int snmp_mib_init(void *ptr[2], size_t mibsize)
+int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
 {
        BUG_ON(ptr == NULL);
-       ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+       ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
        if (!ptr[0])
                goto err0;
-       ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+       ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
        if (!ptr[1])
                goto err1;
        return 0;
@@ -1418,7 +1443,7 @@ err0:
 }
 EXPORT_SYMBOL_GPL(snmp_mib_init);
 
-void snmp_mib_free(void *ptr[2])
+void snmp_mib_free(void __percpu *ptr[2])
 {
        BUG_ON(ptr == NULL);
        free_percpu(ptr[0]);
@@ -1462,25 +1487,25 @@ static const struct net_protocol icmp_protocol = {
 
 static __net_init int ipv4_mib_init_net(struct net *net)
 {
-       if (snmp_mib_init((void **)net->mib.tcp_statistics,
+       if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
                          sizeof(struct tcp_mib)) < 0)
                goto err_tcp_mib;
-       if (snmp_mib_init((void **)net->mib.ip_statistics,
+       if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
                          sizeof(struct ipstats_mib)) < 0)
                goto err_ip_mib;
-       if (snmp_mib_init((void **)net->mib.net_statistics,
+       if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
                          sizeof(struct linux_mib)) < 0)
                goto err_net_mib;
-       if (snmp_mib_init((void **)net->mib.udp_statistics,
+       if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
                          sizeof(struct udp_mib)) < 0)
                goto err_udp_mib;
-       if (snmp_mib_init((void **)net->mib.udplite_statistics,
+       if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
                          sizeof(struct udp_mib)) < 0)
                goto err_udplite_mib;
-       if (snmp_mib_init((void **)net->mib.icmp_statistics,
+       if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
                          sizeof(struct icmp_mib)) < 0)
                goto err_icmp_mib;
-       if (snmp_mib_init((void **)net->mib.icmpmsg_statistics,
+       if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
                          sizeof(struct icmpmsg_mib)) < 0)
                goto err_icmpmsg_mib;
 
@@ -1488,30 +1513,30 @@ static __net_init int ipv4_mib_init_net(struct net *net)
        return 0;
 
 err_icmpmsg_mib:
-       snmp_mib_free((void **)net->mib.icmp_statistics);
+       snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
 err_icmp_mib:
-       snmp_mib_free((void **)net->mib.udplite_statistics);
+       snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
 err_udplite_mib:
-       snmp_mib_free((void **)net->mib.udp_statistics);
+       snmp_mib_free((void __percpu **)net->mib.udp_statistics);
 err_udp_mib:
-       snmp_mib_free((void **)net->mib.net_statistics);
+       snmp_mib_free((void __percpu **)net->mib.net_statistics);
 err_net_mib:
-       snmp_mib_free((void **)net->mib.ip_statistics);
+       snmp_mib_free((void __percpu **)net->mib.ip_statistics);
 err_ip_mib:
-       snmp_mib_free((void **)net->mib.tcp_statistics);
+       snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
 err_tcp_mib:
        return -ENOMEM;
 }
 
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
-       snmp_mib_free((void **)net->mib.icmpmsg_statistics);
-       snmp_mib_free((void **)net->mib.icmp_statistics);
-       snmp_mib_free((void **)net->mib.udplite_statistics);
-       snmp_mib_free((void **)net->mib.udp_statistics);
-       snmp_mib_free((void **)net->mib.net_statistics);
-       snmp_mib_free((void **)net->mib.ip_statistics);
-       snmp_mib_free((void **)net->mib.tcp_statistics);
+       snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics);
+       snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
+       snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
+       snmp_mib_free((void __percpu **)net->mib.udp_statistics);
+       snmp_mib_free((void __percpu **)net->mib.net_statistics);
+       snmp_mib_free((void __percpu **)net->mib.ip_statistics);
+       snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
 }
 
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
@@ -1548,9 +1573,13 @@ static int __init inet_init(void)
 
        BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
 
+       sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
+       if (!sysctl_local_reserved_ports)
+               goto out;
+
        rc = proto_register(&tcp_prot, 1);
        if (rc)
-               goto out;
+               goto out_free_reserved_ports;
 
        rc = proto_register(&udp_prot, 1);
        if (rc)
@@ -1649,6 +1678,8 @@ out_unregister_udp_proto:
        proto_unregister(&udp_prot);
 out_unregister_tcp_proto:
        proto_unregister(&tcp_prot);
+out_free_reserved_ports:
+       kfree(sysctl_local_reserved_ports);
        goto out;
 }