caif: Bugfix - Poll can't return POLLHUP while connecting.
[safe/jmp/linux-2.6] / net / dccp / proto.c
index ecf3be9..b03ecf6 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/if_arp.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <net/checksum.h>
 
 #include <net/inet_sock.h>
@@ -40,16 +41,10 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
 
 EXPORT_SYMBOL_GPL(dccp_statistics);
 
-atomic_t dccp_orphan_count = ATOMIC_INIT(0);
-
+struct percpu_counter dccp_orphan_count;
 EXPORT_SYMBOL_GPL(dccp_orphan_count);
 
-struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
-       .lhash_lock     = RW_LOCK_UNLOCKED,
-       .lhash_users    = ATOMIC_INIT(0),
-       .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
-};
-
+struct inet_hashinfo dccp_hashinfo;
 EXPORT_SYMBOL_GPL(dccp_hashinfo);
 
 /* the maximum queue length for tx in packets. 0 is no limit */
@@ -130,7 +125,7 @@ EXPORT_SYMBOL_GPL(dccp_done);
 
 const char *dccp_packet_name(const int type)
 {
-       static const char *dccp_packet_names[] = {
+       static const char *const dccp_packet_names[] = {
                [DCCP_PKT_REQUEST]  = "REQUEST",
                [DCCP_PKT_RESPONSE] = "RESPONSE",
                [DCCP_PKT_DATA]     = "DATA",
@@ -153,7 +148,7 @@ EXPORT_SYMBOL_GPL(dccp_packet_name);
 
 const char *dccp_state_name(const int state)
 {
-       static char *dccp_state_names[] = {
+       static const char *const dccp_state_names[] = {
        [DCCP_OPEN]             = "OPEN",
        [DCCP_REQUESTING]       = "REQUESTING",
        [DCCP_PARTOPEN]         = "PARTOPEN",
@@ -185,11 +180,11 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
        sk->sk_state            = DCCP_CLOSED;
        sk->sk_write_space      = dccp_write_space;
        icsk->icsk_sync_mss     = dccp_sync_mss;
-       dp->dccps_mss_cache     = TCP_MIN_RCVMSS;
+       dp->dccps_mss_cache     = 536;
        dp->dccps_rate_last     = jiffies;
        dp->dccps_role          = DCCP_ROLE_UNDEFINED;
        dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
-       dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
+       dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
 
        dccp_init_xmit_timers(sk);
 
@@ -284,7 +279,7 @@ int dccp_disconnect(struct sock *sk, int flags)
                sk->sk_send_head = NULL;
        }
 
-       inet->dport = 0;
+       inet->inet_dport = 0;
 
        if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
                inet_reset_saddr(sk);
@@ -296,7 +291,7 @@ int dccp_disconnect(struct sock *sk, int flags)
        inet_csk_delack_init(sk);
        __sk_dst_reset(sk);
 
-       WARN_ON(inet->num && !icsk->icsk_bind_hash);
+       WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
        sk->sk_error_report(sk);
        return err;
@@ -317,7 +312,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
        unsigned int mask;
        struct sock *sk = sock->sk;
 
-       poll_wait(file, sk->sk_sleep, wait);
+       sock_poll_wait(file, sk_sleep(sk), wait);
        if (sk->sk_state == DCCP_LISTEN)
                return inet_csk_listen_poll(sk);
 
@@ -399,7 +394,7 @@ out:
 EXPORT_SYMBOL_GPL(dccp_ioctl);
 
 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
-                                  char __user *optval, int optlen)
+                                  char __user *optval, unsigned int optlen)
 {
        struct dccp_sock *dp = dccp_sk(sk);
        struct dccp_service_list *sl = NULL;
@@ -470,7 +465,7 @@ static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
 }
 
 static int dccp_setsockopt_ccid(struct sock *sk, int type,
-                               char __user *optval, int optlen)
+                               char __user *optval, unsigned int optlen)
 {
        u8 *val;
        int rc = 0;
@@ -500,7 +495,7 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
 }
 
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
-               char __user *optval, int optlen)
+               char __user *optval, unsigned int optlen)
 {
        struct dccp_sock *dp = dccp_sk(sk);
        int val, err = 0;
@@ -542,20 +537,6 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
        case DCCP_SOCKOPT_RECV_CSCOV:
                err = dccp_setsockopt_cscov(sk, val, true);
                break;
-       case DCCP_SOCKOPT_QPOLICY_ID:
-               if (sk->sk_state != DCCP_CLOSED)
-                       err = -EISCONN;
-               else if (val < 0 || val >= DCCPQ_POLICY_MAX)
-                       err = -EINVAL;
-               else
-                       dp->dccps_qpolicy = val;
-               break;
-       case DCCP_SOCKOPT_QPOLICY_TXQLEN:
-               if (val < 0)
-                       err = -EINVAL;
-               else
-                       dp->dccps_tx_qlen = val;
-               break;
        default:
                err = -ENOPROTOOPT;
                break;
@@ -566,7 +547,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 }
 
 int dccp_setsockopt(struct sock *sk, int level, int optname,
-                   char __user *optval, int optlen)
+                   char __user *optval, unsigned int optlen)
 {
        if (level != SOL_DCCP)
                return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
@@ -579,7 +560,7 @@ EXPORT_SYMBOL_GPL(dccp_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
-                          char __user *optval, int optlen)
+                          char __user *optval, unsigned int optlen)
 {
        if (level != SOL_DCCP)
                return inet_csk_compat_setsockopt(sk, level, optname,
@@ -663,12 +644,6 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
        case DCCP_SOCKOPT_RECV_CSCOV:
                val = dp->dccps_pcrlen;
                break;
-       case DCCP_SOCKOPT_QPOLICY_ID:
-               val = dp->dccps_qpolicy;
-               break;
-       case DCCP_SOCKOPT_QPOLICY_TXQLEN:
-               val = dp->dccps_tx_qlen;
-               break;
        case 128 ... 191:
                return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
                                             len, (u32 __user *)optval, optlen);
@@ -711,47 +686,6 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 #endif
 
-static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
-{
-       struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
-
-       /*
-        * Assign an (opaque) qpolicy priority value to skb->priority.
-        *
-        * We are overloading this skb field for use with the qpolicy subystem.
-        * The skb->priority is normally used for the SO_PRIORITY option, which
-        * is initialised from sk_priority. Since the assignment of sk_priority
-        * to skb->priority happens later (on layer 3), we overload this field
-        * for use with queueing priorities as long as the skb is on layer 4.
-        * The default priority value (if nothing is set) is 0.
-        */
-       skb->priority = 0;
-
-       for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
-
-               if (!CMSG_OK(msg, cmsg))
-                       return -EINVAL;
-
-               if (cmsg->cmsg_level != SOL_DCCP)
-                       continue;
-
-               if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
-                   !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
-                       return -EINVAL;
-
-               switch (cmsg->cmsg_type) {
-               case DCCP_SCM_PRIORITY:
-                       if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
-                               return -EINVAL;
-                       skb->priority = *(__u32 *)CMSG_DATA(cmsg);
-                       break;
-               default:
-                       return -EINVAL;
-               }
-       }
-       return 0;
-}
-
 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                 size_t len)
 {
@@ -767,7 +701,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
        lock_sock(sk);
 
-       if (dccp_qpolicy_full(sk)) {
+       if (sysctl_dccp_tx_qlen &&
+           (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
                rc = -EAGAIN;
                goto out_release;
        }
@@ -795,12 +730,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        if (rc != 0)
                goto out_discard;
 
-       rc = dccp_msghdr_parse(msg, skb);
-       if (rc != 0)
-               goto out_discard;
-
-       dccp_qpolicy_push(sk, skb);
-       dccp_write_xmit(sk);
+       skb_queue_tail(&sk->sk_write_queue, skb);
+       dccp_write_xmit(sk,0);
 out_release:
        release_sock(sk);
        return rc ? : len;
@@ -905,6 +836,8 @@ verify_sock_status:
                        len = -EFAULT;
                        break;
                }
+               if (flags & MSG_TRUNC)
+                       len = skb->len;
        found_fin_ok:
                if (!(flags & MSG_PEEK))
                        sk_eat_skb(sk, skb, 0);
@@ -1023,29 +956,15 @@ void dccp_close(struct sock *sk, long timeout)
                /* Check zero linger _after_ checking for unread data. */
                sk->sk_prot->disconnect(sk, 0);
        } else if (sk->sk_state != DCCP_CLOSED) {
-               /*
-                * Normal connection termination. May need to wait if there are
-                * still packets in the TX queue that are delayed by the CCID.
-                */
-               dccp_flush_write_queue(sk, &timeout);
                dccp_terminate_connection(sk);
        }
 
-       /*
-        * Flush write queue. This may be necessary in several cases:
-        * - we have been closed by the peer but still have application data;
-        * - abortive termination (unread data or zero linger time),
-        * - normal termination but queue could not be flushed within time limit
-        */
-       __skb_queue_purge(&sk->sk_write_queue);
-
        sk_stream_wait_close(sk, timeout);
 
 adjudge_to_death:
        state = sk->sk_state;
        sock_hold(sk);
        sock_orphan(sk);
-       atomic_inc(sk->sk_prot->orphan_count);
 
        /*
         * It is the last release_sock in its life. It will remove backlog.
@@ -1059,6 +978,8 @@ adjudge_to_death:
        bh_lock_sock(sk);
        WARN_ON(sock_owned_by_user(sk));
 
+       percpu_counter_inc(sk->sk_prot->orphan_count);
+
        /* Have we already been destroyed by a softirq or backlog? */
        if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
                goto out;
@@ -1085,12 +1006,13 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
 
 static inline int dccp_mib_init(void)
 {
-       return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
+       return snmp_mib_init((void __percpu **)dccp_statistics,
+                            sizeof(struct dccp_mib));
 }
 
 static inline void dccp_mib_exit(void)
 {
-       snmp_mib_free((void**)dccp_statistics);
+       snmp_mib_free((void __percpu **)dccp_statistics);
 }
 
 static int thash_entries;
@@ -1109,17 +1031,21 @@ static int __init dccp_init(void)
 {
        unsigned long goal;
        int ehash_order, bhash_order, i;
-       int rc = -ENOBUFS;
+       int rc;
 
        BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
                     FIELD_SIZEOF(struct sk_buff, cb));
-
+       rc = percpu_counter_init(&dccp_orphan_count, 0);
+       if (rc)
+               goto out_fail;
+       rc = -ENOBUFS;
+       inet_hashinfo_init(&dccp_hashinfo);
        dccp_hashinfo.bind_bucket_cachep =
                kmem_cache_create("dccp_bind_bucket",
                                  sizeof(struct inet_bind_bucket), 0,
                                  SLAB_HWCACHE_ALIGN, NULL);
        if (!dccp_hashinfo.bind_bucket_cachep)
-               goto out;
+               goto out_free_percpu;
 
        /*
         * Size and allocate the main established and bind bucket
@@ -1127,10 +1053,10 @@ static int __init dccp_init(void)
         *
         * The methodology is similar to that of the buffer cache.
         */
-       if (num_physpages >= (128 * 1024))
-               goal = num_physpages >> (21 - PAGE_SHIFT);
+       if (totalram_pages >= (128 * 1024))
+               goal = totalram_pages >> (21 - PAGE_SHIFT);
        else
-               goal = num_physpages >> (23 - PAGE_SHIFT);
+               goal = totalram_pages >> (23 - PAGE_SHIFT);
 
        if (thash_entries)
                goal = (thash_entries *
@@ -1138,13 +1064,14 @@ static int __init dccp_init(void)
        for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
                ;
        do {
-               dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
+               unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
                                        sizeof(struct inet_ehash_bucket);
-               while (dccp_hashinfo.ehash_size &
-                      (dccp_hashinfo.ehash_size - 1))
-                       dccp_hashinfo.ehash_size--;
+
+               while (hash_size & (hash_size - 1))
+                       hash_size--;
+               dccp_hashinfo.ehash_mask = hash_size - 1;
                dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
-                       __get_free_pages(GFP_ATOMIC, ehash_order);
+                       __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
        } while (!dccp_hashinfo.ehash && --ehash_order > 0);
 
        if (!dccp_hashinfo.ehash) {
@@ -1152,9 +1079,9 @@ static int __init dccp_init(void)
                goto out_free_bind_bucket_cachep;
        }
 
-       for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
-               INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
-               INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
+       for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) {
+               INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
+               INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
        }
 
        if (inet_ehash_locks_alloc(&dccp_hashinfo))
@@ -1169,7 +1096,7 @@ static int __init dccp_init(void)
                    bhash_order > 0)
                        continue;
                dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
-                       __get_free_pages(GFP_ATOMIC, bhash_order);
+                       __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
        } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
 
        if (!dccp_hashinfo.bhash) {
@@ -1194,40 +1121,52 @@ static int __init dccp_init(void)
        if (rc)
                goto out_ackvec_exit;
 
+       rc = ccid_initialize_builtins();
+       if (rc)
+               goto out_sysctl_exit;
+
        dccp_timestamping_init();
-out:
-       return rc;
+
+       return 0;
+
+out_sysctl_exit:
+       dccp_sysctl_exit();
 out_ackvec_exit:
        dccp_ackvec_exit();
 out_free_dccp_mib:
        dccp_mib_exit();
 out_free_dccp_bhash:
        free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
-       dccp_hashinfo.bhash = NULL;
 out_free_dccp_locks:
        inet_ehash_locks_free(&dccp_hashinfo);
 out_free_dccp_ehash:
        free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-       dccp_hashinfo.ehash = NULL;
 out_free_bind_bucket_cachep:
        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+out_free_percpu:
+       percpu_counter_destroy(&dccp_orphan_count);
+out_fail:
+       dccp_hashinfo.bhash = NULL;
+       dccp_hashinfo.ehash = NULL;
        dccp_hashinfo.bind_bucket_cachep = NULL;
-       goto out;
+       return rc;
 }
 
 static void __exit dccp_fini(void)
 {
+       ccid_cleanup_builtins();
        dccp_mib_exit();
        free_pages((unsigned long)dccp_hashinfo.bhash,
                   get_order(dccp_hashinfo.bhash_size *
                             sizeof(struct inet_bind_hashbucket)));
        free_pages((unsigned long)dccp_hashinfo.ehash,
-                  get_order(dccp_hashinfo.ehash_size *
+                  get_order((dccp_hashinfo.ehash_mask + 1) *
                             sizeof(struct inet_ehash_bucket)));
        inet_ehash_locks_free(&dccp_hashinfo);
        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
        dccp_ackvec_exit();
        dccp_sysctl_exit();
+       percpu_counter_destroy(&dccp_orphan_count);
 }
 
 module_init(dccp_init);