4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 EXPORT_SYMBOL_GPL(dccp_statistics);
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 .lhash_lock = RW_LOCK_UNLOCKED,
49 .lhash_users = ATOMIC_INIT(0),
50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
58 void dccp_set_state(struct sock *sk, const int state)
60 const int oldstate = sk->sk_state;
62 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
63 dccp_state_name(oldstate), dccp_state_name(state));
64 WARN_ON(state == oldstate);
68 if (oldstate != DCCP_OPEN)
69 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
73 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74 oldstate == DCCP_CLOSING)
75 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77 sk->sk_prot->unhash(sk);
78 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
83 if (oldstate == DCCP_OPEN)
84 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
87 /* Change state AFTER socket is unhashed to avoid closed
88 * socket sitting in hash tables.
93 EXPORT_SYMBOL_GPL(dccp_set_state);
95 static void dccp_finish_passive_close(struct sock *sk)
97 switch (sk->sk_state) {
98 case DCCP_PASSIVE_CLOSE:
99 /* Node (client or server) has received Close packet. */
100 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101 dccp_set_state(sk, DCCP_CLOSED);
103 case DCCP_PASSIVE_CLOSEREQ:
105 * Client received CloseReq. We set the `active' flag so that
106 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108 dccp_send_close(sk, 1);
109 dccp_set_state(sk, DCCP_CLOSING);
113 void dccp_done(struct sock *sk)
115 dccp_set_state(sk, DCCP_CLOSED);
116 dccp_clear_xmit_timers(sk);
118 sk->sk_shutdown = SHUTDOWN_MASK;
120 if (!sock_flag(sk, SOCK_DEAD))
121 sk->sk_state_change(sk);
123 inet_csk_destroy_sock(sk);
126 EXPORT_SYMBOL_GPL(dccp_done);
128 const char *dccp_packet_name(const int type)
130 static const char *dccp_packet_names[] = {
131 [DCCP_PKT_REQUEST] = "REQUEST",
132 [DCCP_PKT_RESPONSE] = "RESPONSE",
133 [DCCP_PKT_DATA] = "DATA",
134 [DCCP_PKT_ACK] = "ACK",
135 [DCCP_PKT_DATAACK] = "DATAACK",
136 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137 [DCCP_PKT_CLOSE] = "CLOSE",
138 [DCCP_PKT_RESET] = "RESET",
139 [DCCP_PKT_SYNC] = "SYNC",
140 [DCCP_PKT_SYNCACK] = "SYNCACK",
143 if (type >= DCCP_NR_PKT_TYPES)
146 return dccp_packet_names[type];
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
151 const char *dccp_state_name(const int state)
153 static char *dccp_state_names[] = {
154 [DCCP_OPEN] = "OPEN",
155 [DCCP_REQUESTING] = "REQUESTING",
156 [DCCP_PARTOPEN] = "PARTOPEN",
157 [DCCP_LISTEN] = "LISTEN",
158 [DCCP_RESPOND] = "RESPOND",
159 [DCCP_CLOSING] = "CLOSING",
160 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
161 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
162 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163 [DCCP_TIME_WAIT] = "TIME_WAIT",
164 [DCCP_CLOSED] = "CLOSED",
167 if (state >= DCCP_MAX_STATES)
168 return "INVALID STATE!";
170 return dccp_state_names[state];
173 EXPORT_SYMBOL_GPL(dccp_state_name);
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
177 struct dccp_sock *dp = dccp_sk(sk);
178 struct dccp_minisock *dmsk = dccp_msk(sk);
179 struct inet_connection_sock *icsk = inet_csk(sk);
181 dccp_minisock_init(&dp->dccps_minisock);
183 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
184 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
185 sk->sk_state = DCCP_CLOSED;
186 sk->sk_write_space = dccp_write_space;
187 icsk->icsk_sync_mss = dccp_sync_mss;
188 dp->dccps_mss_cache = 536;
189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
194 dccp_init_xmit_timers(sk);
196 INIT_LIST_HEAD(&dp->dccps_featneg);
198 * FIXME: We're hardcoding the CCID, and doing this at this point makes
199 * the listening (master) sock get CCID control blocks, which is not
200 * necessary, but for now, to not mess with the test userspace apps,
201 * lets leave it here, later the real solution is to do this in a
202 * setsockopt(CCIDs-I-want/accept). -acme
204 if (likely(ctl_sock_initialized)) {
205 int rc = dccp_feat_init(sk);
210 if (dmsk->dccpms_send_ack_vector) {
211 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212 if (dp->dccps_hc_rx_ackvec == NULL)
215 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
217 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
219 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220 dp->dccps_hc_tx_ccid == NULL)) {
221 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223 if (dmsk->dccpms_send_ack_vector) {
224 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225 dp->dccps_hc_rx_ackvec = NULL;
227 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
231 /* control socket doesn't need feat nego */
232 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233 INIT_LIST_HEAD(&dmsk->dccpms_conf);
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
241 void dccp_destroy_sock(struct sock *sk)
243 struct dccp_sock *dp = dccp_sk(sk);
244 struct dccp_minisock *dmsk = dccp_msk(sk);
247 * DCCP doesn't use sk_write_queue, just sk_send_head
248 * for retransmissions
250 if (sk->sk_send_head != NULL) {
251 kfree_skb(sk->sk_send_head);
252 sk->sk_send_head = NULL;
255 /* Clean up a referenced DCCP bind bucket. */
256 if (inet_csk(sk)->icsk_bind_hash != NULL)
259 kfree(dp->dccps_service_list);
260 dp->dccps_service_list = NULL;
262 if (dmsk->dccpms_send_ack_vector) {
263 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264 dp->dccps_hc_rx_ackvec = NULL;
266 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
270 /* clean up feature negotiation state */
271 dccp_feat_list_purge(&dp->dccps_featneg);
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
278 struct dccp_sock *dp = dccp_sk(sk);
280 dp->dccps_role = DCCP_ROLE_LISTEN;
281 /* do not start to listen if feature negotiation setup fails */
282 if (dccp_feat_finalise_settings(dp))
284 return inet_csk_listen_start(sk, backlog);
287 static inline int dccp_need_reset(int state)
289 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
290 state != DCCP_REQUESTING;
293 int dccp_disconnect(struct sock *sk, int flags)
295 struct inet_connection_sock *icsk = inet_csk(sk);
296 struct inet_sock *inet = inet_sk(sk);
298 const int old_state = sk->sk_state;
300 if (old_state != DCCP_CLOSED)
301 dccp_set_state(sk, DCCP_CLOSED);
304 * This corresponds to the ABORT function of RFC793, sec. 3.8
305 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
307 if (old_state == DCCP_LISTEN) {
308 inet_csk_listen_stop(sk);
309 } else if (dccp_need_reset(old_state)) {
310 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
311 sk->sk_err = ECONNRESET;
312 } else if (old_state == DCCP_REQUESTING)
313 sk->sk_err = ECONNRESET;
315 dccp_clear_xmit_timers(sk);
317 __skb_queue_purge(&sk->sk_receive_queue);
318 __skb_queue_purge(&sk->sk_write_queue);
319 if (sk->sk_send_head != NULL) {
320 __kfree_skb(sk->sk_send_head);
321 sk->sk_send_head = NULL;
326 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
327 inet_reset_saddr(sk);
330 sock_reset_flag(sk, SOCK_DONE);
332 icsk->icsk_backoff = 0;
333 inet_csk_delack_init(sk);
336 WARN_ON(inet->num && !icsk->icsk_bind_hash);
338 sk->sk_error_report(sk);
342 EXPORT_SYMBOL_GPL(dccp_disconnect);
345 * Wait for a DCCP event.
347 * Note that we don't need to lock the socket, as the upper poll layers
348 * take care of normal races (between the test and the event) and we don't
349 * go look at any of the socket buffers directly.
351 unsigned int dccp_poll(struct file *file, struct socket *sock,
355 struct sock *sk = sock->sk;
357 poll_wait(file, sk->sk_sleep, wait);
358 if (sk->sk_state == DCCP_LISTEN)
359 return inet_csk_listen_poll(sk);
361 /* Socket is not locked. We are protected from async events
362 by poll logic and correct handling of state changes
363 made by another threads is impossible in any case.
370 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
372 if (sk->sk_shutdown & RCV_SHUTDOWN)
373 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
376 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
377 if (atomic_read(&sk->sk_rmem_alloc) > 0)
378 mask |= POLLIN | POLLRDNORM;
380 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
381 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
382 mask |= POLLOUT | POLLWRNORM;
383 } else { /* send SIGIO later */
384 set_bit(SOCK_ASYNC_NOSPACE,
385 &sk->sk_socket->flags);
386 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
388 /* Race breaker. If space is freed after
389 * wspace test but before the flags are set,
390 * IO signal will be lost.
392 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
393 mask |= POLLOUT | POLLWRNORM;
400 EXPORT_SYMBOL_GPL(dccp_poll);
402 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
408 if (sk->sk_state == DCCP_LISTEN)
414 unsigned long amount = 0;
416 skb = skb_peek(&sk->sk_receive_queue);
419 * We will only return the amount of this packet since
420 * that is all that will be read.
424 rc = put_user(amount, (int __user *)arg);
436 EXPORT_SYMBOL_GPL(dccp_ioctl);
438 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
439 char __user *optval, int optlen)
441 struct dccp_sock *dp = dccp_sk(sk);
442 struct dccp_service_list *sl = NULL;
444 if (service == DCCP_SERVICE_INVALID_VALUE ||
445 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
448 if (optlen > sizeof(service)) {
449 sl = kmalloc(optlen, GFP_KERNEL);
453 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
454 if (copy_from_user(sl->dccpsl_list,
455 optval + sizeof(service),
456 optlen - sizeof(service)) ||
457 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
464 dp->dccps_service = service;
466 kfree(dp->dccps_service_list);
468 dp->dccps_service_list = sl;
473 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
478 if (cscov < 0 || cscov > 15)
481 * Populate a list of permissible values, in the range cscov...15. This
482 * is necessary since feature negotiation of single values only works if
483 * both sides incidentally choose the same value. Since the list starts
484 * lowest-value first, negotiation will pick the smallest shared value.
490 list = kmalloc(len, GFP_KERNEL);
494 for (i = 0; i < len; i++)
497 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
501 dccp_sk(sk)->dccps_pcrlen = cscov;
503 dccp_sk(sk)->dccps_pcslen = cscov;
509 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
510 char __user *optval, int optlen)
512 struct dccp_sock *dp = dccp_sk(sk);
516 case DCCP_SOCKOPT_PACKET_SIZE:
517 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
519 case DCCP_SOCKOPT_CHANGE_L:
520 case DCCP_SOCKOPT_CHANGE_R:
521 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
525 if (optlen < (int)sizeof(int))
528 if (get_user(val, (int __user *)optval))
531 if (optname == DCCP_SOCKOPT_SERVICE)
532 return dccp_setsockopt_service(sk, val, optval, optlen);
536 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
537 if (dp->dccps_role != DCCP_ROLE_SERVER)
540 dp->dccps_server_timewait = (val != 0);
542 case DCCP_SOCKOPT_SEND_CSCOV:
543 err = dccp_setsockopt_cscov(sk, val, false);
545 case DCCP_SOCKOPT_RECV_CSCOV:
546 err = dccp_setsockopt_cscov(sk, val, true);
557 int dccp_setsockopt(struct sock *sk, int level, int optname,
558 char __user *optval, int optlen)
560 if (level != SOL_DCCP)
561 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
564 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
567 EXPORT_SYMBOL_GPL(dccp_setsockopt);
570 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
571 char __user *optval, int optlen)
573 if (level != SOL_DCCP)
574 return inet_csk_compat_setsockopt(sk, level, optname,
576 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
579 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
582 static int dccp_getsockopt_service(struct sock *sk, int len,
583 __be32 __user *optval,
586 const struct dccp_sock *dp = dccp_sk(sk);
587 const struct dccp_service_list *sl;
588 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
591 if ((sl = dp->dccps_service_list) != NULL) {
592 slen = sl->dccpsl_nr * sizeof(u32);
601 if (put_user(total_len, optlen) ||
602 put_user(dp->dccps_service, optval) ||
603 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
610 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
611 char __user *optval, int __user *optlen)
613 struct dccp_sock *dp;
616 if (get_user(len, optlen))
619 if (len < (int)sizeof(int))
625 case DCCP_SOCKOPT_PACKET_SIZE:
626 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
628 case DCCP_SOCKOPT_SERVICE:
629 return dccp_getsockopt_service(sk, len,
630 (__be32 __user *)optval, optlen);
631 case DCCP_SOCKOPT_GET_CUR_MPS:
632 val = dp->dccps_mss_cache;
634 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
635 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
636 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
637 val = dp->dccps_server_timewait;
639 case DCCP_SOCKOPT_SEND_CSCOV:
640 val = dp->dccps_pcslen;
642 case DCCP_SOCKOPT_RECV_CSCOV:
643 val = dp->dccps_pcrlen;
646 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
647 len, (u32 __user *)optval, optlen);
649 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
650 len, (u32 __user *)optval, optlen);
656 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
662 int dccp_getsockopt(struct sock *sk, int level, int optname,
663 char __user *optval, int __user *optlen)
665 if (level != SOL_DCCP)
666 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
669 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
672 EXPORT_SYMBOL_GPL(dccp_getsockopt);
675 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
676 char __user *optval, int __user *optlen)
678 if (level != SOL_DCCP)
679 return inet_csk_compat_getsockopt(sk, level, optname,
681 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
684 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
687 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
690 const struct dccp_sock *dp = dccp_sk(sk);
691 const int flags = msg->msg_flags;
692 const int noblock = flags & MSG_DONTWAIT;
697 if (len > dp->dccps_mss_cache)
702 if (sysctl_dccp_tx_qlen &&
703 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
708 timeo = sock_sndtimeo(sk, noblock);
711 * We have to use sk_stream_wait_connect here to set sk_write_pending,
712 * so that the trick in dccp_rcv_request_sent_state_process.
714 /* Wait for a connection to finish. */
715 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
716 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
719 size = sk->sk_prot->max_header + len;
721 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
726 skb_reserve(skb, sk->sk_prot->max_header);
727 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
731 skb_queue_tail(&sk->sk_write_queue, skb);
732 dccp_write_xmit(sk,0);
741 EXPORT_SYMBOL_GPL(dccp_sendmsg);
743 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
744 size_t len, int nonblock, int flags, int *addr_len)
746 const struct dccp_hdr *dh;
751 if (sk->sk_state == DCCP_LISTEN) {
756 timeo = sock_rcvtimeo(sk, nonblock);
759 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
762 goto verify_sock_status;
766 switch (dh->dccph_type) {
768 case DCCP_PKT_DATAACK:
772 case DCCP_PKT_CLOSEREQ:
773 if (!(flags & MSG_PEEK))
774 dccp_finish_passive_close(sk);
777 dccp_pr_debug("found fin (%s) ok!\n",
778 dccp_packet_name(dh->dccph_type));
782 dccp_pr_debug("packet_type=%s\n",
783 dccp_packet_name(dh->dccph_type));
784 sk_eat_skb(sk, skb, 0);
787 if (sock_flag(sk, SOCK_DONE)) {
793 len = sock_error(sk);
797 if (sk->sk_shutdown & RCV_SHUTDOWN) {
802 if (sk->sk_state == DCCP_CLOSED) {
803 if (!sock_flag(sk, SOCK_DONE)) {
804 /* This occurs when user tries to read
805 * from never connected socket.
819 if (signal_pending(current)) {
820 len = sock_intr_errno(timeo);
824 sk_wait_data(sk, &timeo);
829 else if (len < skb->len)
830 msg->msg_flags |= MSG_TRUNC;
832 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
833 /* Exception. Bailout! */
838 if (!(flags & MSG_PEEK))
839 sk_eat_skb(sk, skb, 0);
847 EXPORT_SYMBOL_GPL(dccp_recvmsg);
849 int inet_dccp_listen(struct socket *sock, int backlog)
851 struct sock *sk = sock->sk;
852 unsigned char old_state;
858 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
861 old_state = sk->sk_state;
862 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
865 /* Really, if the socket is already in listen state
866 * we can only allow the backlog to be adjusted.
868 if (old_state != DCCP_LISTEN) {
870 * FIXME: here it probably should be sk->sk_prot->listen_start
871 * see tcp_listen_start
873 err = dccp_listen_start(sk, backlog);
877 sk->sk_max_ack_backlog = backlog;
885 EXPORT_SYMBOL_GPL(inet_dccp_listen);
887 static void dccp_terminate_connection(struct sock *sk)
889 u8 next_state = DCCP_CLOSED;
891 switch (sk->sk_state) {
892 case DCCP_PASSIVE_CLOSE:
893 case DCCP_PASSIVE_CLOSEREQ:
894 dccp_finish_passive_close(sk);
897 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
898 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
901 dccp_send_close(sk, 1);
903 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
904 !dccp_sk(sk)->dccps_server_timewait)
905 next_state = DCCP_ACTIVE_CLOSEREQ;
907 next_state = DCCP_CLOSING;
910 dccp_set_state(sk, next_state);
914 void dccp_close(struct sock *sk, long timeout)
916 struct dccp_sock *dp = dccp_sk(sk);
918 u32 data_was_unread = 0;
923 sk->sk_shutdown = SHUTDOWN_MASK;
925 if (sk->sk_state == DCCP_LISTEN) {
926 dccp_set_state(sk, DCCP_CLOSED);
929 inet_csk_listen_stop(sk);
931 goto adjudge_to_death;
934 sk_stop_timer(sk, &dp->dccps_xmit_timer);
937 * We need to flush the recv. buffs. We do this only on the
938 * descriptor close, not protocol-sourced closes, because the
939 *reader process may not have drained the data yet!
941 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
942 data_was_unread += skb->len;
946 if (data_was_unread) {
947 /* Unread data was tossed, send an appropriate Reset Code */
948 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
949 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
950 dccp_set_state(sk, DCCP_CLOSED);
951 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
952 /* Check zero linger _after_ checking for unread data. */
953 sk->sk_prot->disconnect(sk, 0);
954 } else if (sk->sk_state != DCCP_CLOSED) {
955 dccp_terminate_connection(sk);
958 sk_stream_wait_close(sk, timeout);
961 state = sk->sk_state;
964 atomic_inc(sk->sk_prot->orphan_count);
967 * It is the last release_sock in its life. It will remove backlog.
971 * Now socket is owned by kernel and we acquire BH lock
972 * to finish close. No need to check for user refs.
976 WARN_ON(sock_owned_by_user(sk));
978 /* Have we already been destroyed by a softirq or backlog? */
979 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
982 if (sk->sk_state == DCCP_CLOSED)
983 inet_csk_destroy_sock(sk);
985 /* Otherwise, socket is reprieved until protocol close. */
993 EXPORT_SYMBOL_GPL(dccp_close);
995 void dccp_shutdown(struct sock *sk, int how)
997 dccp_pr_debug("called shutdown(%x)\n", how);
1000 EXPORT_SYMBOL_GPL(dccp_shutdown);
1002 static inline int dccp_mib_init(void)
1004 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1007 static inline void dccp_mib_exit(void)
1009 snmp_mib_free((void**)dccp_statistics);
1012 static int thash_entries;
1013 module_param(thash_entries, int, 0444);
1014 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1016 #ifdef CONFIG_IP_DCCP_DEBUG
1018 module_param(dccp_debug, bool, 0644);
1019 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1021 EXPORT_SYMBOL_GPL(dccp_debug);
1024 static int __init dccp_init(void)
1027 int ehash_order, bhash_order, i;
1030 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1031 FIELD_SIZEOF(struct sk_buff, cb));
1033 dccp_hashinfo.bind_bucket_cachep =
1034 kmem_cache_create("dccp_bind_bucket",
1035 sizeof(struct inet_bind_bucket), 0,
1036 SLAB_HWCACHE_ALIGN, NULL);
1037 if (!dccp_hashinfo.bind_bucket_cachep)
1041 * Size and allocate the main established and bind bucket
1044 * The methodology is similar to that of the buffer cache.
1046 if (num_physpages >= (128 * 1024))
1047 goal = num_physpages >> (21 - PAGE_SHIFT);
1049 goal = num_physpages >> (23 - PAGE_SHIFT);
1052 goal = (thash_entries *
1053 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1054 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1057 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1058 sizeof(struct inet_ehash_bucket);
1059 while (dccp_hashinfo.ehash_size &
1060 (dccp_hashinfo.ehash_size - 1))
1061 dccp_hashinfo.ehash_size--;
1062 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1063 __get_free_pages(GFP_ATOMIC, ehash_order);
1064 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1066 if (!dccp_hashinfo.ehash) {
1067 DCCP_CRIT("Failed to allocate DCCP established hash table");
1068 goto out_free_bind_bucket_cachep;
1071 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1072 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1073 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1076 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1077 goto out_free_dccp_ehash;
1079 bhash_order = ehash_order;
1082 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1083 sizeof(struct inet_bind_hashbucket);
1084 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1087 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1088 __get_free_pages(GFP_ATOMIC, bhash_order);
1089 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1091 if (!dccp_hashinfo.bhash) {
1092 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1093 goto out_free_dccp_locks;
1096 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1097 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1098 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1101 rc = dccp_mib_init();
1103 goto out_free_dccp_bhash;
1105 rc = dccp_ackvec_init();
1107 goto out_free_dccp_mib;
1109 rc = dccp_sysctl_init();
1111 goto out_ackvec_exit;
1113 dccp_timestamping_init();
1120 out_free_dccp_bhash:
1121 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1122 dccp_hashinfo.bhash = NULL;
1123 out_free_dccp_locks:
1124 inet_ehash_locks_free(&dccp_hashinfo);
1125 out_free_dccp_ehash:
1126 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1127 dccp_hashinfo.ehash = NULL;
1128 out_free_bind_bucket_cachep:
1129 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1130 dccp_hashinfo.bind_bucket_cachep = NULL;
1134 static void __exit dccp_fini(void)
1137 free_pages((unsigned long)dccp_hashinfo.bhash,
1138 get_order(dccp_hashinfo.bhash_size *
1139 sizeof(struct inet_bind_hashbucket)));
1140 free_pages((unsigned long)dccp_hashinfo.ehash,
1141 get_order(dccp_hashinfo.ehash_size *
1142 sizeof(struct inet_ehash_bucket)));
1143 inet_ehash_locks_free(&dccp_hashinfo);
1144 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1149 module_init(dccp_init);
1150 module_exit(dccp_fini);
1152 MODULE_LICENSE("GPL");
1153 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1154 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");