[DCCP] ackvec: Ditch dccpav_buf_len
[safe/jmp/linux-2.6] / net / dccp / ipv4.c
index 40fe6af..f8df565 100644 (file)
@@ -19,7 +19,9 @@
 
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
+#include <net/inet_sock.h>
 #include <net/sock.h>
+#include <net/timewait_sock.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 
@@ -31,15 +33,14 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
        .lhash_lock     = RW_LOCK_UNLOCKED,
        .lhash_users    = ATOMIC_INIT(0),
        .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
-       .portalloc_lock = SPIN_LOCK_UNLOCKED,
-       .port_rover     = 1024 - 1,
 };
 
 EXPORT_SYMBOL_GPL(dccp_hashinfo);
 
 static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
 {
-       return inet_csk_get_port(&dccp_hashinfo, sk, snum);
+       return inet_csk_get_port(&dccp_hashinfo, sk, snum,
+                                inet_csk_bind_conflict);
 }
 
 static void dccp_v4_hash(struct sock *sk)
@@ -47,195 +48,14 @@ static void dccp_v4_hash(struct sock *sk)
        inet_hash(&dccp_hashinfo, sk);
 }
 
-static void dccp_v4_unhash(struct sock *sk)
+void dccp_unhash(struct sock *sk)
 {
        inet_unhash(&dccp_hashinfo, sk);
 }
 
-/* called with local bh disabled */
-static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
-                                     struct inet_timewait_sock **twp)
-{
-       struct inet_sock *inet = inet_sk(sk);
-       const u32 daddr = inet->rcv_saddr;
-       const u32 saddr = inet->daddr;
-       const int dif = sk->sk_bound_dev_if;
-       INET_ADDR_COOKIE(acookie, saddr, daddr)
-       const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-       const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport,
-                                     dccp_hashinfo.ehash_size);
-       struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
-       const struct sock *sk2;
-       const struct hlist_node *node;
-       struct inet_timewait_sock *tw;
-
-       write_lock(&head->lock);
-
-       /* Check TIME-WAIT sockets first. */
-       sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
-               tw = inet_twsk(sk2);
-
-               if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
-                       goto not_unique;
-       }
-       tw = NULL;
-
-       /* And established part... */
-       sk_for_each(sk2, node, &head->chain) {
-               if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
-                       goto not_unique;
-       }
+EXPORT_SYMBOL_GPL(dccp_unhash);
 
-       /* Must record num and sport now. Otherwise we will see
-        * in hash table socket with a funny identity. */
-       inet->num = lport;
-       inet->sport = htons(lport);
-       sk->sk_hashent = hash;
-       BUG_TRAP(sk_unhashed(sk));
-       __sk_add_node(sk, &head->chain);
-       sock_prot_inc_use(sk->sk_prot);
-       write_unlock(&head->lock);
-
-       if (twp != NULL) {
-               *twp = tw;
-               NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-       } else if (tw != NULL) {
-               /* Silly. Should hash-dance instead... */
-               inet_twsk_deschedule(tw, &dccp_death_row);
-               NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-               inet_twsk_put(tw);
-       }
-
-       return 0;
-
-not_unique:
-       write_unlock(&head->lock);
-       return -EADDRNOTAVAIL;
-}
-
-/*
- * Bind a port for a connect operation and hash it.
- */
-static int dccp_v4_hash_connect(struct sock *sk)
-{
-       const unsigned short snum = inet_sk(sk)->num;
-       struct inet_bind_hashbucket *head;
-       struct inet_bind_bucket *tb;
-       int ret;
-
-       if (snum == 0) {
-               int rover;
-               int low = sysctl_local_port_range[0];
-               int high = sysctl_local_port_range[1];
-               int remaining = (high - low) + 1;
-               struct hlist_node *node;
-               struct inet_timewait_sock *tw = NULL;
-
-               local_bh_disable();
-
-               /* TODO. Actually it is not so bad idea to remove
-                * dccp_hashinfo.portalloc_lock before next submission to
-                * Linus.
-                * As soon as we touch this place at all it is time to think.
-                *
-                * Now it protects single _advisory_ variable
-                * dccp_hashinfo.port_rover, hence it is mostly useless.
-                * Code will work nicely if we just delete it, but
-                * I am afraid in contented case it will work not better or
-                * even worse: another cpu just will hit the same bucket
-                * and spin there.
-                * So some cpu salt could remove both contention and
-                * memory pingpong. Any ideas how to do this in a nice way?
-                */
-               spin_lock(&dccp_hashinfo.portalloc_lock);
-               rover = dccp_hashinfo.port_rover;
-
-               do {
-                       rover++;
-                       if ((rover < low) || (rover > high))
-                               rover = low;
-                       head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
-                                                   dccp_hashinfo.bhash_size)];
-                       spin_lock(&head->lock);
-
-                       /* Does not bother with rcv_saddr checks,
-                        * because the established check is already
-                        * unique enough.
-                        */
-                       inet_bind_bucket_for_each(tb, node, &head->chain) {
-                               if (tb->port == rover) {
-                                       BUG_TRAP(!hlist_empty(&tb->owners));
-                                       if (tb->fastreuse >= 0)
-                                               goto next_port;
-                                       if (!__dccp_v4_check_established(sk,
-                                                                        rover,
-                                                                        &tw))
-                                               goto ok;
-                                       goto next_port;
-                               }
-                       }
-
-                       tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
-                                                    head, rover);
-                       if (tb == NULL) {
-                               spin_unlock(&head->lock);
-                               break;
-                       }
-                       tb->fastreuse = -1;
-                       goto ok;
-
-               next_port:
-                       spin_unlock(&head->lock);
-               } while (--remaining > 0);
-               dccp_hashinfo.port_rover = rover;
-               spin_unlock(&dccp_hashinfo.portalloc_lock);
-
-               local_bh_enable();
-
-               return -EADDRNOTAVAIL;
-
-ok:
-               /* All locks still held and bhs disabled */
-               dccp_hashinfo.port_rover = rover;
-               spin_unlock(&dccp_hashinfo.portalloc_lock);
-
-               inet_bind_hash(sk, tb, rover);
-               if (sk_unhashed(sk)) {
-                       inet_sk(sk)->sport = htons(rover);
-                       __inet_hash(&dccp_hashinfo, sk, 0);
-               }
-               spin_unlock(&head->lock);
-
-               if (tw != NULL) {
-                       inet_twsk_deschedule(tw, &dccp_death_row);
-                       inet_twsk_put(tw);
-               }
-
-               ret = 0;
-               goto out;
-       }
-
-       head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
-                                                dccp_hashinfo.bhash_size)];
-       tb   = inet_csk(sk)->icsk_bind_hash;
-       spin_lock_bh(&head->lock);
-       if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
-               __inet_hash(&dccp_hashinfo, sk, 0);
-               spin_unlock_bh(&head->lock);
-               return 0;
-       } else {
-               spin_unlock(&head->lock);
-               /* No definite answer... Walk to established hash table */
-               ret = __dccp_v4_check_established(sk, snum, NULL);
-out:
-               local_bh_enable();
-               return ret;
-       }
-}
-
-static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
-                          int addr_len)
+int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct dccp_sock *dp = dccp_sk(sk);
@@ -285,9 +105,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
        inet->dport = usin->sin_port;
        inet->daddr = daddr;
 
-       dp->dccps_ext_header_len = 0;
+       inet_csk(sk)->icsk_ext_hdr_len = 0;
        if (inet->opt != NULL)
-               dp->dccps_ext_header_len = inet->opt->optlen;
+               inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
        /*
         * Socket identity is still unknown (sport may be zero).
         * However we set state to DCCP_REQUESTING and not releasing socket
@@ -295,11 +115,12 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
         * complete initialization after this.
         */
        dccp_set_state(sk, DCCP_REQUESTING);
-       err = dccp_v4_hash_connect(sk);
+       err = inet_hash_connect(&dccp_death_row, sk);
        if (err != 0)
                goto failure;
 
-       err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
+       err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport,
+                               sk);
        if (err != 0)
                goto failure;
 
@@ -313,16 +134,6 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
                                                            usin->sin_port);
        dccp_update_gss(sk, dp->dccps_iss);
 
-       /*
-        * SWL and AWL are initially adjusted so that they are not less than
-        * the initial Sequence Numbers received and sent, respectively:
-        *      SWL := max(GSR + 1 - floor(W/4), ISR),
-        *      AWL := max(GSS - W' + 1, ISS).
-        * These adjustments MUST be applied only at the beginning of the
-        * connection.
-        */
-       dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
-
        inet->id = dp->dccps_iss ^ jiffies;
 
        err = dccp_connect(sk);
@@ -342,6 +153,8 @@ failure:
        goto out;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_connect);
+
 /*
  * This routine does path mtu discovery as defined in RFC1191.
  */
@@ -380,7 +193,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
        mtu = dst_mtu(dst);
 
        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-           dp->dccps_pmtu_cookie > mtu) {
+           inet_csk(sk)->icsk_pmtu_cookie > mtu) {
                dccp_sync_mss(sk, mtu);
 
                /*
@@ -463,6 +276,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
        if (skb != NULL) {
                const struct inet_request_sock *ireq = inet_rsk(req);
 
+               memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
                                            ireq->rmt_addr,
                                            ireq->opt);
@@ -631,6 +445,17 @@ out:
        sock_put(sk);
 }
 
+/* This routine computes an IPv4 DCCP checksum. */
+void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+       const struct inet_sock *inet = inet_sk(sk);
+       struct dccp_hdr *dh = dccp_hdr(skb);
+
+       dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr);
+}
+
+EXPORT_SYMBOL_GPL(dccp_v4_send_check);
+
 int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
 {
        struct sk_buff *skb;
@@ -647,6 +472,7 @@ int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
        if (skb != NULL) {
                const struct inet_sock *inet = inet_sk(sk);
 
+               memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
                err = ip_build_and_send_pkt(skb, sk,
                                            inet->saddr, inet->daddr, NULL);
                if (err == NET_XMIT_CN)
@@ -665,16 +491,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
                                           dccp_hdr(skb)->dccph_sport);
 }
 
-static inline int dccp_bad_service_code(const struct sock *sk,
-                                       const __u32 service)
-{
-       const struct dccp_sock *dp = dccp_sk(sk);
-
-       if (dp->dccps_service == service)
-               return 0;
-       return !dccp_list_has_service(dp->dccps_service_list, service);
-}
-
 int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
        struct inet_request_sock *ireq;
@@ -686,7 +502,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
        struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
        __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
-       struct dst_entry *dst = NULL;
 
        /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
        if (((struct rtable *)skb->dst)->rt_flags &
@@ -727,7 +542,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        ireq = inet_rsk(req);
        ireq->loc_addr = daddr;
        ireq->rmt_addr = saddr;
-       /* FIXME: Merge Aristeu's option parsing code when ready */
        req->rcv_wnd    = 100; /* Fake, option parsing will get the
                                  right value */
        ireq->opt       = NULL;
@@ -745,23 +559,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        dreq->dreq_iss     = dccp_v4_init_sequence(sk, skb);
        dreq->dreq_service = service;
 
-       if (dccp_v4_send_response(sk, req, dst))
+       if (dccp_v4_send_response(sk, req, NULL))
                goto drop_and_free;
 
        inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
        return 0;
 
 drop_and_free:
-       /*
-        * FIXME: should be reqsk_free after implementing req->rsk_ops
-        */
-       __reqsk_free(req);
+       reqsk_free(req);
 drop:
        DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
        dcb->dccpd_reset_code = reset_code;
        return -1;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
+
 /*
  * The three way handshake has completed - we got a valid ACK or DATAACK -
  * now create the new socket.
@@ -816,6 +629,8 @@ exit:
        return NULL;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
+
 static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
        const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -1035,7 +850,9 @@ discard:
        return 0;
 }
 
-static inline int dccp_invalid_packet(struct sk_buff *skb)
+EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
+
+int dccp_invalid_packet(struct sk_buff *skb)
 {
        const struct dccp_hdr *dh;
 
@@ -1089,29 +906,30 @@ static inline int dccp_invalid_packet(struct sk_buff *skb)
                return 1;
        }
 
-       /* If the header checksum is incorrect, drop packet and return */
-       if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
-                                   skb->nh.iph->daddr) < 0) {
-               LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
-                                           "incorrect\n");
-               return 1;
-       }
-
        return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_invalid_packet);
+
 /* this is called when real data arrives */
 int dccp_v4_rcv(struct sk_buff *skb)
 {
        const struct dccp_hdr *dh;
        struct sock *sk;
-       int rc;
 
        /* Step 1: Check header basics: */
 
        if (dccp_invalid_packet(skb))
                goto discard_it;
 
+       /* If the header checksum is incorrect, drop packet and return */
+       if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
+                                   skb->nh.iph->daddr) < 0) {
+               LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n",
+                              __FUNCTION__);
+               goto discard_it;
+       }
+
        dh = dccp_hdr(skb);
 
        DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
@@ -1167,28 +985,11 @@ int dccp_v4_rcv(struct sk_buff *skb)
                 goto do_time_wait;
        }
 
-       if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
-               dccp_pr_debug("xfrm4_policy_check failed\n");
+       if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
                goto discard_and_relse;
-       }
-
-        if (sk_filter(sk, skb, 0)) {
-               dccp_pr_debug("sk_filter failed\n");
-                goto discard_and_relse;
-       }
-
-       skb->dev = NULL;
-
-       bh_lock_sock(sk);
-       rc = 0;
-       if (!sock_owned_by_user(sk))
-               rc = dccp_v4_do_rcv(sk, skb);
-       else
-               sk_add_backlog(sk, skb);
-       bh_unlock_sock(sk);
+       nf_reset(skb);
 
-       sock_put(sk);
-       return rc;
+       return sk_receive_skb(sk, skb);
 
 no_dccp_socket:
        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -1218,17 +1019,30 @@ do_time_wait:
        goto no_dccp_socket;
 }
 
-static int dccp_v4_init_sock(struct sock *sk)
+struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
+       .queue_xmit     = ip_queue_xmit,
+       .send_check     = dccp_v4_send_check,
+       .rebuild_header = inet_sk_rebuild_header,
+       .conn_request   = dccp_v4_conn_request,
+       .syn_recv_sock  = dccp_v4_request_recv_sock,
+       .net_header_len = sizeof(struct iphdr),
+       .setsockopt     = ip_setsockopt,
+       .getsockopt     = ip_getsockopt,
+       .addr2sockaddr  = inet_csk_addr2sockaddr,
+       .sockaddr_len   = sizeof(struct sockaddr_in),
+};
+
+int dccp_v4_init_sock(struct sock *sk)
 {
        struct dccp_sock *dp = dccp_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
        static int dccp_ctl_socket_init = 1;
 
        dccp_options_init(&dp->dccps_options);
        do_gettimeofday(&dp->dccps_epoch);
 
        if (dp->dccps_options.dccpo_send_ack_vector) {
-               dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
-                                                          GFP_KERNEL);
+               dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
                if (dp->dccps_hc_rx_ackvec == NULL)
                        return -ENOMEM;
        }
@@ -1260,9 +1074,11 @@ static int dccp_v4_init_sock(struct sock *sk)
                dccp_ctl_socket_init = 0;
 
        dccp_init_xmit_timers(sk);
-       inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
+       icsk->icsk_rto = DCCP_TIMEOUT_INIT;
        sk->sk_state = DCCP_CLOSED;
        sk->sk_write_space = dccp_write_space;
+       icsk->icsk_af_ops = &dccp_ipv4_af_ops;
+       icsk->icsk_sync_mss = dccp_sync_mss;
        dp->dccps_mss_cache = 536;
        dp->dccps_role = DCCP_ROLE_UNDEFINED;
        dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
@@ -1270,12 +1086,14 @@ static int dccp_v4_init_sock(struct sock *sk)
        return 0;
 }
 
-static int dccp_v4_destroy_sock(struct sock *sk)
+EXPORT_SYMBOL_GPL(dccp_v4_init_sock);
+
+int dccp_v4_destroy_sock(struct sock *sk)
 {
        struct dccp_sock *dp = dccp_sk(sk);
 
        /*
-        * DCCP doesn't use sk_qrite_queue, just sk_send_head
+        * DCCP doesn't use sk_write_queue, just sk_send_head
         * for retransmissions
         */
        if (sk->sk_send_head != NULL) {
@@ -1287,10 +1105,8 @@ static int dccp_v4_destroy_sock(struct sock *sk)
        if (inet_csk(sk)->icsk_bind_hash != NULL)
                inet_put_port(&dccp_hashinfo, sk);
 
-       if (dp->dccps_service_list != NULL) {
-               kfree(dp->dccps_service_list);
-               dp->dccps_service_list = NULL;
-       }
+       kfree(dp->dccps_service_list);
+       dp->dccps_service_list = NULL;
 
        ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
        ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
@@ -1305,6 +1121,8 @@ static int dccp_v4_destroy_sock(struct sock *sk)
        return 0;
 }
 
+EXPORT_SYMBOL_GPL(dccp_v4_destroy_sock);
+
 static void dccp_v4_reqsk_destructor(struct request_sock *req)
 {
        kfree(inet_rsk(req)->opt);
@@ -1319,7 +1137,11 @@ static struct request_sock_ops dccp_request_sock_ops = {
        .send_reset     = dccp_v4_ctl_send_reset,
 };
 
-struct proto dccp_v4_prot = {
+static struct timewait_sock_ops dccp_timewait_sock_ops = {
+       .twsk_obj_size  = sizeof(struct inet_timewait_sock),
+};
+
+struct proto dccp_prot = {
        .name                   = "DCCP",
        .owner                  = THIS_MODULE,
        .close                  = dccp_close,
@@ -1333,7 +1155,7 @@ struct proto dccp_v4_prot = {
        .recvmsg                = dccp_recvmsg,
        .backlog_rcv            = dccp_v4_do_rcv,
        .hash                   = dccp_v4_hash,
-       .unhash                 = dccp_v4_unhash,
+       .unhash                 = dccp_unhash,
        .accept                 = inet_csk_accept,
        .get_port               = dccp_v4_get_port,
        .shutdown               = dccp_shutdown,
@@ -1342,5 +1164,7 @@ struct proto dccp_v4_prot = {
        .max_header             = MAX_DCCP_HEADER,
        .obj_size               = sizeof(struct dccp_sock),
        .rsk_prot               = &dccp_request_sock_ops,
-       .twsk_obj_size          = sizeof(struct inet_timewait_sock),
+       .twsk_prot              = &dccp_timewait_sock_ops,
 };
+
+EXPORT_SYMBOL_GPL(dccp_prot);