#include <net/icmp.h>
#include <net/inet_hashtables.h>
+#include <net/inet_sock.h>
#include <net/sock.h>
+#include <net/timewait_sock.h>
#include <net/tcp_states.h>
#include <net/xfrm.h>
+#include "ackvec.h"
#include "ccid.h"
#include "dccp.h"
.lhash_lock = RW_LOCK_UNLOCKED,
.lhash_users = ATOMIC_INIT(0),
.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
- .portalloc_lock = SPIN_LOCK_UNLOCKED,
- .port_rover = 1024 - 1,
};
EXPORT_SYMBOL_GPL(dccp_hashinfo);
static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
{
- return inet_csk_get_port(&dccp_hashinfo, sk, snum);
+ return inet_csk_get_port(&dccp_hashinfo, sk, snum,
+ inet_csk_bind_conflict);
}
static void dccp_v4_hash(struct sock *sk)
inet_hash(&dccp_hashinfo, sk);
}
-static void dccp_v4_unhash(struct sock *sk)
+void dccp_unhash(struct sock *sk)
{
inet_unhash(&dccp_hashinfo, sk);
}
-/* called with local bh disabled */
-static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
- struct inet_timewait_sock **twp)
-{
- struct inet_sock *inet = inet_sk(sk);
- const u32 daddr = inet->rcv_saddr;
- const u32 saddr = inet->daddr;
- const int dif = sk->sk_bound_dev_if;
- INET_ADDR_COOKIE(acookie, saddr, daddr)
- const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
- const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport,
- dccp_hashinfo.ehash_size);
- struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
- const struct sock *sk2;
- const struct hlist_node *node;
- struct inet_timewait_sock *tw;
-
- write_lock(&head->lock);
-
- /* Check TIME-WAIT sockets first. */
- sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
- tw = inet_twsk(sk2);
-
- if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
- goto not_unique;
- }
- tw = NULL;
-
- /* And established part... */
- sk_for_each(sk2, node, &head->chain) {
- if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
- goto not_unique;
- }
-
- /* Must record num and sport now. Otherwise we will see
- * in hash table socket with a funny identity. */
- inet->num = lport;
- inet->sport = htons(lport);
- sk->sk_hashent = hash;
- BUG_TRAP(sk_unhashed(sk));
- __sk_add_node(sk, &head->chain);
- sock_prot_inc_use(sk->sk_prot);
- write_unlock(&head->lock);
-
- if (twp != NULL) {
- *twp = tw;
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
- } else if (tw != NULL) {
- /* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, &dccp_death_row);
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
- inet_twsk_put(tw);
- }
-
- return 0;
-
-not_unique:
- write_unlock(&head->lock);
- return -EADDRNOTAVAIL;
-}
+EXPORT_SYMBOL_GPL(dccp_unhash);
-/*
- * Bind a port for a connect operation and hash it.
- */
-static int dccp_v4_hash_connect(struct sock *sk)
-{
- const unsigned short snum = inet_sk(sk)->num;
- struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- int ret;
-
- if (snum == 0) {
- int rover;
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
- int remaining = (high - low) + 1;
- struct hlist_node *node;
- struct inet_timewait_sock *tw = NULL;
-
- local_bh_disable();
-
- /* TODO. Actually it is not so bad idea to remove
- * dccp_hashinfo.portalloc_lock before next submission to
- * Linus.
- * As soon as we touch this place at all it is time to think.
- *
- * Now it protects single _advisory_ variable
- * dccp_hashinfo.port_rover, hence it is mostly useless.
- * Code will work nicely if we just delete it, but
- * I am afraid in contented case it will work not better or
- * even worse: another cpu just will hit the same bucket
- * and spin there.
- * So some cpu salt could remove both contention and
- * memory pingpong. Any ideas how to do this in a nice way?
- */
- spin_lock(&dccp_hashinfo.portalloc_lock);
- rover = dccp_hashinfo.port_rover;
-
- do {
- rover++;
- if ((rover < low) || (rover > high))
- rover = low;
- head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
- dccp_hashinfo.bhash_size)];
- spin_lock(&head->lock);
-
- /* Does not bother with rcv_saddr checks,
- * because the established check is already
- * unique enough.
- */
- inet_bind_bucket_for_each(tb, node, &head->chain) {
- if (tb->port == rover) {
- BUG_TRAP(!hlist_empty(&tb->owners));
- if (tb->fastreuse >= 0)
- goto next_port;
- if (!__dccp_v4_check_established(sk,
- rover,
- &tw))
- goto ok;
- goto next_port;
- }
- }
-
- tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
- head, rover);
- if (tb == NULL) {
- spin_unlock(&head->lock);
- break;
- }
- tb->fastreuse = -1;
- goto ok;
-
- next_port:
- spin_unlock(&head->lock);
- } while (--remaining > 0);
- dccp_hashinfo.port_rover = rover;
- spin_unlock(&dccp_hashinfo.portalloc_lock);
-
- local_bh_enable();
-
- return -EADDRNOTAVAIL;
-
-ok:
- /* All locks still held and bhs disabled */
- dccp_hashinfo.port_rover = rover;
- spin_unlock(&dccp_hashinfo.portalloc_lock);
-
- inet_bind_hash(sk, tb, rover);
- if (sk_unhashed(sk)) {
- inet_sk(sk)->sport = htons(rover);
- __inet_hash(&dccp_hashinfo, sk, 0);
- }
- spin_unlock(&head->lock);
-
- if (tw != NULL) {
- inet_twsk_deschedule(tw, &dccp_death_row);
- inet_twsk_put(tw);
- }
-
- ret = 0;
- goto out;
- }
-
- head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
- dccp_hashinfo.bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
- if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
- __inet_hash(&dccp_hashinfo, sk, 0);
- spin_unlock_bh(&head->lock);
- return 0;
- } else {
- spin_unlock(&head->lock);
- /* No definite answer... Walk to established hash table */
- ret = __dccp_v4_check_established(sk, snum, NULL);
-out:
- local_bh_enable();
- return ret;
- }
-}
-
-static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
- int addr_len)
+int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_CLIENT;
+ if (dccp_service_not_initialized(sk))
+ return -EPROTO;
+
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
inet->dport = usin->sin_port;
inet->daddr = daddr;
- dp->dccps_ext_header_len = 0;
+ inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet->opt != NULL)
- dp->dccps_ext_header_len = inet->opt->optlen;
+ inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
/*
* Socket identity is still unknown (sport may be zero).
* However we set state to DCCP_REQUESTING and not releasing socket
* complete initialization after this.
*/
dccp_set_state(sk, DCCP_REQUESTING);
- err = dccp_v4_hash_connect(sk);
+ err = inet_hash_connect(&dccp_death_row, sk);
if (err != 0)
goto failure;
- err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
+ err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport,
+ sk);
if (err != 0)
goto failure;
goto out;
}
+EXPORT_SYMBOL_GPL(dccp_v4_connect);
+
/*
* This routine does path mtu discovery as defined in RFC1191.
*/
mtu = dst_mtu(dst);
if (inet->pmtudisc != IP_PMTUDISC_DONT &&
- dp->dccps_pmtu_cookie > mtu) {
+ inet_csk(sk)->icsk_pmtu_cookie > mtu) {
dccp_sync_mss(sk, mtu);
/*
if (skb != NULL) {
const struct inet_request_sock *ireq = inet_rsk(req);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
ireq->rmt_addr,
ireq->opt);
sock_put(sk);
}
+/* This routine computes an IPv4 DCCP checksum. */
+void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ struct dccp_hdr *dh = dccp_hdr(skb);
+
+ dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr);
+}
+
+EXPORT_SYMBOL_GPL(dccp_v4_send_check);
+
int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
{
struct sk_buff *skb;
skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
if (skb != NULL) {
- const struct dccp_sock *dp = dccp_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
err = ip_build_and_send_pkt(skb, sk,
inet->saddr, inet->daddr, NULL);
if (err == NET_XMIT_CN)
err = 0;
-
- ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
}
return err;
struct dccp_request_sock *dreq;
const __u32 saddr = skb->nh.iph->saddr;
const __u32 daddr = skb->nh.iph->daddr;
- struct dst_entry *dst = NULL;
+ const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
+ struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+ __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
/* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
if (((struct rtable *)skb->dst)->rt_flags &
- (RTCF_BROADCAST | RTCF_MULTICAST))
+ (RTCF_BROADCAST | RTCF_MULTICAST)) {
+ reset_code = DCCP_RESET_CODE_NO_CONNECTION;
goto drop;
+ }
+ if (dccp_bad_service_code(sk, service)) {
+ reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+ goto drop;
+ }
/*
* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
ireq = inet_rsk(req);
ireq->loc_addr = daddr;
ireq->rmt_addr = saddr;
- /* FIXME: Merge Aristeu's option parsing code when ready */
req->rcv_wnd = 100; /* Fake, option parsing will get the
right value */
ireq->opt = NULL;
* dccp_create_openreq_child.
*/
dreq = dccp_rsk(req);
- dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
- dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
- dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
+ dreq->dreq_isr = dcb->dccpd_seq;
+ dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
+ dreq->dreq_service = service;
- if (dccp_v4_send_response(sk, req, dst))
+ if (dccp_v4_send_response(sk, req, NULL))
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
return 0;
drop_and_free:
- /*
- * FIXME: should be reqsk_free after implementing req->rsk_ops
- */
- __reqsk_free(req);
+ reqsk_free(req);
drop:
DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+ dcb->dccpd_reset_code = reset_code;
return -1;
}
+EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
+
/*
* The three way handshake has completed - we got a valid ACK or DATAACK -
* now create the new socket.
return NULL;
}
+EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
+
static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
return 0;
reset:
- DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
dccp_v4_ctl_send_reset(skb);
discard:
kfree_skb(skb);
return 0;
}
-static inline int dccp_invalid_packet(struct sk_buff *skb)
+EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
+
+int dccp_invalid_packet(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
return 1;
}
- /* If the header checksum is incorrect, drop packet and return */
- if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
- skb->nh.iph->daddr) < 0) {
- LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
- "incorrect\n");
- return 1;
- }
-
return 0;
}
+EXPORT_SYMBOL_GPL(dccp_invalid_packet);
+
/* this is called when real data arrives */
int dccp_v4_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
struct sock *sk;
- int rc;
/* Step 1: Check header basics: */
if (dccp_invalid_packet(skb))
goto discard_it;
+ /* If the header checksum is incorrect, drop packet and return */
+ if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
+ skb->nh.iph->daddr) < 0) {
+ LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n",
+ __FUNCTION__);
+ goto discard_it;
+ }
+
dh = dccp_hdr(skb);
-#if 0
- /*
- * Use something like this to simulate some DATA/DATAACK loss to test
- * dccp_ackpkts_add, you'll get something like this on a session that
- * sends 10 DATA/DATAACK packets:
- *
- * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
- *
- * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
- * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
- * with the same state
- * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
- *
- * So...
- *
- * 281473596467422 was received
- * 281473596467421 was not received
- * 281473596467420 was received
- * 281473596467419 was not received
- * 281473596467418 was received
- * 281473596467417 was not received
- * 281473596467416 was received
- * 281473596467415 was not received
- * 281473596467414 was received
- * 281473596467413 was received (this one was the 3way handshake
- * RESPONSE)
- *
- */
- if (dh->dccph_type == DCCP_PKT_DATA ||
- dh->dccph_type == DCCP_PKT_DATAACK) {
- static int discard = 0;
- if (discard) {
- discard = 0;
- goto discard_it;
- }
- discard = 1;
- }
-#endif
DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
goto do_time_wait;
}
- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
- dccp_pr_debug("xfrm4_policy_check failed\n");
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- }
-
- if (sk_filter(sk, skb, 0)) {
- dccp_pr_debug("sk_filter failed\n");
- goto discard_and_relse;
- }
-
- skb->dev = NULL;
+ nf_reset(skb);
- bh_lock_sock(sk);
- rc = 0;
- if (!sock_owned_by_user(sk))
- rc = dccp_v4_do_rcv(sk, skb);
- else
- sk_add_backlog(sk, skb);
- bh_unlock_sock(sk);
-
- sock_put(sk);
- return rc;
+ return sk_receive_skb(sk, skb);
no_dccp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto no_dccp_socket;
}
-static int dccp_v4_init_sock(struct sock *sk)
+struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
+ .queue_xmit = ip_queue_xmit,
+ .send_check = dccp_v4_send_check,
+ .rebuild_header = inet_sk_rebuild_header,
+ .conn_request = dccp_v4_conn_request,
+ .syn_recv_sock = dccp_v4_request_recv_sock,
+ .net_header_len = sizeof(struct iphdr),
+ .setsockopt = ip_setsockopt,
+ .getsockopt = ip_getsockopt,
+ .addr2sockaddr = inet_csk_addr2sockaddr,
+ .sockaddr_len = sizeof(struct sockaddr_in),
+};
+
+int dccp_v4_init_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
static int dccp_ctl_socket_init = 1;
dccp_options_init(&dp->dccps_options);
+ do_gettimeofday(&dp->dccps_epoch);
if (dp->dccps_options.dccpo_send_ack_vector) {
- dp->dccps_hc_rx_ackpkts =
- dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
- GFP_KERNEL);
-
- if (dp->dccps_hc_rx_ackpkts == NULL)
+ dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
+ if (dp->dccps_hc_rx_ackvec == NULL)
return -ENOMEM;
}
* setsockopt(CCIDs-I-want/accept). -acme
*/
if (likely(!dccp_ctl_socket_init)) {
- dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+ dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid,
sk);
- dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+ dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid,
sk);
if (dp->dccps_hc_rx_ccid == NULL ||
dp->dccps_hc_tx_ccid == NULL) {
ccid_exit(dp->dccps_hc_rx_ccid, sk);
ccid_exit(dp->dccps_hc_tx_ccid, sk);
- dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
- dp->dccps_hc_rx_ackpkts = NULL;
+ if (dp->dccps_options.dccpo_send_ack_vector) {
+ dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+ dp->dccps_hc_rx_ackvec = NULL;
+ }
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
return -ENOMEM;
}
dccp_ctl_socket_init = 0;
dccp_init_xmit_timers(sk);
- inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
+ icsk->icsk_rto = DCCP_TIMEOUT_INIT;
sk->sk_state = DCCP_CLOSED;
+ sk->sk_write_space = dccp_write_space;
+ icsk->icsk_af_ops = &dccp_ipv4_af_ops;
+ icsk->icsk_sync_mss = dccp_sync_mss;
dp->dccps_mss_cache = 536;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
+ dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
return 0;
}
-static int dccp_v4_destroy_sock(struct sock *sk)
+EXPORT_SYMBOL_GPL(dccp_v4_init_sock);
+
+int dccp_v4_destroy_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
/*
- * DCCP doesn't use sk_qrite_queue, just sk_send_head
+ * DCCP doesn't use sk_write_queue, just sk_send_head
* for retransmissions
*/
if (sk->sk_send_head != NULL) {
if (inet_csk(sk)->icsk_bind_hash != NULL)
inet_put_port(&dccp_hashinfo, sk);
- dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
- dp->dccps_hc_rx_ackpkts = NULL;
+ kfree(dp->dccps_service_list);
+ dp->dccps_service_list = NULL;
+
+ ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+ if (dp->dccps_options.dccpo_send_ack_vector) {
+ dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+ dp->dccps_hc_rx_ackvec = NULL;
+ }
ccid_exit(dp->dccps_hc_rx_ccid, sk);
ccid_exit(dp->dccps_hc_tx_ccid, sk);
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
return 0;
}
+EXPORT_SYMBOL_GPL(dccp_v4_destroy_sock);
+
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
kfree(inet_rsk(req)->opt);
.send_reset = dccp_v4_ctl_send_reset,
};
-struct proto dccp_v4_prot = {
+static struct timewait_sock_ops dccp_timewait_sock_ops = {
+ .twsk_obj_size = sizeof(struct inet_timewait_sock),
+};
+
+struct proto dccp_prot = {
.name = "DCCP",
.owner = THIS_MODULE,
.close = dccp_close,
.recvmsg = dccp_recvmsg,
.backlog_rcv = dccp_v4_do_rcv,
.hash = dccp_v4_hash,
- .unhash = dccp_v4_unhash,
+ .unhash = dccp_unhash,
.accept = inet_csk_accept,
.get_port = dccp_v4_get_port,
.shutdown = dccp_shutdown,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp_sock),
.rsk_prot = &dccp_request_sock_ops,
- .twsk_obj_size = sizeof(struct inet_timewait_sock),
+ .twsk_prot = &dccp_timewait_sock_ops,
};
+
+EXPORT_SYMBOL_GPL(dccp_prot);