4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 EXPORT_SYMBOL_GPL(dccp_statistics);
43 struct percpu_counter dccp_orphan_count;
44 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 struct inet_hashinfo dccp_hashinfo;
47 EXPORT_SYMBOL_GPL(dccp_hashinfo);
49 /* the maximum queue length for tx in packets. 0 is no limit */
50 int sysctl_dccp_tx_qlen __read_mostly = 5;
52 void dccp_set_state(struct sock *sk, const int state)
54 const int oldstate = sk->sk_state;
56 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
57 dccp_state_name(oldstate), dccp_state_name(state));
58 WARN_ON(state == oldstate);
62 if (oldstate != DCCP_OPEN)
63 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
64 /* Client retransmits all Confirm options until entering OPEN */
65 if (oldstate == DCCP_PARTOPEN)
66 dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
70 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
71 oldstate == DCCP_CLOSING)
72 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
74 sk->sk_prot->unhash(sk);
75 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80 if (oldstate == DCCP_OPEN)
81 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
84 /* Change state AFTER socket is unhashed to avoid closed
85 * socket sitting in hash tables.
90 EXPORT_SYMBOL_GPL(dccp_set_state);
92 static void dccp_finish_passive_close(struct sock *sk)
94 switch (sk->sk_state) {
95 case DCCP_PASSIVE_CLOSE:
96 /* Node (client or server) has received Close packet. */
97 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
98 dccp_set_state(sk, DCCP_CLOSED);
100 case DCCP_PASSIVE_CLOSEREQ:
102 * Client received CloseReq. We set the `active' flag so that
103 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
105 dccp_send_close(sk, 1);
106 dccp_set_state(sk, DCCP_CLOSING);
110 void dccp_done(struct sock *sk)
112 dccp_set_state(sk, DCCP_CLOSED);
113 dccp_clear_xmit_timers(sk);
115 sk->sk_shutdown = SHUTDOWN_MASK;
117 if (!sock_flag(sk, SOCK_DEAD))
118 sk->sk_state_change(sk);
120 inet_csk_destroy_sock(sk);
123 EXPORT_SYMBOL_GPL(dccp_done);
125 const char *dccp_packet_name(const int type)
127 static const char *dccp_packet_names[] = {
128 [DCCP_PKT_REQUEST] = "REQUEST",
129 [DCCP_PKT_RESPONSE] = "RESPONSE",
130 [DCCP_PKT_DATA] = "DATA",
131 [DCCP_PKT_ACK] = "ACK",
132 [DCCP_PKT_DATAACK] = "DATAACK",
133 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
134 [DCCP_PKT_CLOSE] = "CLOSE",
135 [DCCP_PKT_RESET] = "RESET",
136 [DCCP_PKT_SYNC] = "SYNC",
137 [DCCP_PKT_SYNCACK] = "SYNCACK",
140 if (type >= DCCP_NR_PKT_TYPES)
143 return dccp_packet_names[type];
146 EXPORT_SYMBOL_GPL(dccp_packet_name);
148 const char *dccp_state_name(const int state)
150 static char *dccp_state_names[] = {
151 [DCCP_OPEN] = "OPEN",
152 [DCCP_REQUESTING] = "REQUESTING",
153 [DCCP_PARTOPEN] = "PARTOPEN",
154 [DCCP_LISTEN] = "LISTEN",
155 [DCCP_RESPOND] = "RESPOND",
156 [DCCP_CLOSING] = "CLOSING",
157 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
158 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
159 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
160 [DCCP_TIME_WAIT] = "TIME_WAIT",
161 [DCCP_CLOSED] = "CLOSED",
164 if (state >= DCCP_MAX_STATES)
165 return "INVALID STATE!";
167 return dccp_state_names[state];
170 EXPORT_SYMBOL_GPL(dccp_state_name);
172 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 struct dccp_sock *dp = dccp_sk(sk);
175 struct inet_connection_sock *icsk = inet_csk(sk);
177 dccp_minisock_init(&dp->dccps_minisock);
179 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
180 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
181 sk->sk_state = DCCP_CLOSED;
182 sk->sk_write_space = dccp_write_space;
183 icsk->icsk_sync_mss = dccp_sync_mss;
184 dp->dccps_mss_cache = 536;
185 dp->dccps_rate_last = jiffies;
186 dp->dccps_role = DCCP_ROLE_UNDEFINED;
187 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
188 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
190 dccp_init_xmit_timers(sk);
192 INIT_LIST_HEAD(&dp->dccps_featneg);
193 /* control socket doesn't need feat nego */
194 if (likely(ctl_sock_initialized))
195 return dccp_feat_init(sk);
199 EXPORT_SYMBOL_GPL(dccp_init_sock);
201 void dccp_destroy_sock(struct sock *sk)
203 struct dccp_sock *dp = dccp_sk(sk);
204 struct dccp_minisock *dmsk = dccp_msk(sk);
207 * DCCP doesn't use sk_write_queue, just sk_send_head
208 * for retransmissions
210 if (sk->sk_send_head != NULL) {
211 kfree_skb(sk->sk_send_head);
212 sk->sk_send_head = NULL;
215 /* Clean up a referenced DCCP bind bucket. */
216 if (inet_csk(sk)->icsk_bind_hash != NULL)
219 kfree(dp->dccps_service_list);
220 dp->dccps_service_list = NULL;
222 if (dmsk->dccpms_send_ack_vector) {
223 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224 dp->dccps_hc_rx_ackvec = NULL;
226 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
227 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
228 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
230 /* clean up feature negotiation state */
231 dccp_feat_list_purge(&dp->dccps_featneg);
234 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
236 static inline int dccp_listen_start(struct sock *sk, int backlog)
238 struct dccp_sock *dp = dccp_sk(sk);
240 dp->dccps_role = DCCP_ROLE_LISTEN;
241 /* do not start to listen if feature negotiation setup fails */
242 if (dccp_feat_finalise_settings(dp))
244 return inet_csk_listen_start(sk, backlog);
247 static inline int dccp_need_reset(int state)
249 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
250 state != DCCP_REQUESTING;
253 int dccp_disconnect(struct sock *sk, int flags)
255 struct inet_connection_sock *icsk = inet_csk(sk);
256 struct inet_sock *inet = inet_sk(sk);
258 const int old_state = sk->sk_state;
260 if (old_state != DCCP_CLOSED)
261 dccp_set_state(sk, DCCP_CLOSED);
264 * This corresponds to the ABORT function of RFC793, sec. 3.8
265 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
267 if (old_state == DCCP_LISTEN) {
268 inet_csk_listen_stop(sk);
269 } else if (dccp_need_reset(old_state)) {
270 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
271 sk->sk_err = ECONNRESET;
272 } else if (old_state == DCCP_REQUESTING)
273 sk->sk_err = ECONNRESET;
275 dccp_clear_xmit_timers(sk);
277 __skb_queue_purge(&sk->sk_receive_queue);
278 __skb_queue_purge(&sk->sk_write_queue);
279 if (sk->sk_send_head != NULL) {
280 __kfree_skb(sk->sk_send_head);
281 sk->sk_send_head = NULL;
286 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
287 inet_reset_saddr(sk);
290 sock_reset_flag(sk, SOCK_DONE);
292 icsk->icsk_backoff = 0;
293 inet_csk_delack_init(sk);
296 WARN_ON(inet->num && !icsk->icsk_bind_hash);
298 sk->sk_error_report(sk);
302 EXPORT_SYMBOL_GPL(dccp_disconnect);
305 * Wait for a DCCP event.
307 * Note that we don't need to lock the socket, as the upper poll layers
308 * take care of normal races (between the test and the event) and we don't
309 * go look at any of the socket buffers directly.
311 unsigned int dccp_poll(struct file *file, struct socket *sock,
315 struct sock *sk = sock->sk;
317 poll_wait(file, sk->sk_sleep, wait);
318 if (sk->sk_state == DCCP_LISTEN)
319 return inet_csk_listen_poll(sk);
321 /* Socket is not locked. We are protected from async events
322 by poll logic and correct handling of state changes
323 made by another threads is impossible in any case.
330 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
332 if (sk->sk_shutdown & RCV_SHUTDOWN)
333 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
336 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
337 if (atomic_read(&sk->sk_rmem_alloc) > 0)
338 mask |= POLLIN | POLLRDNORM;
340 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
341 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
342 mask |= POLLOUT | POLLWRNORM;
343 } else { /* send SIGIO later */
344 set_bit(SOCK_ASYNC_NOSPACE,
345 &sk->sk_socket->flags);
346 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
348 /* Race breaker. If space is freed after
349 * wspace test but before the flags are set,
350 * IO signal will be lost.
352 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
353 mask |= POLLOUT | POLLWRNORM;
360 EXPORT_SYMBOL_GPL(dccp_poll);
362 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
368 if (sk->sk_state == DCCP_LISTEN)
374 unsigned long amount = 0;
376 skb = skb_peek(&sk->sk_receive_queue);
379 * We will only return the amount of this packet since
380 * that is all that will be read.
384 rc = put_user(amount, (int __user *)arg);
396 EXPORT_SYMBOL_GPL(dccp_ioctl);
398 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
399 char __user *optval, int optlen)
401 struct dccp_sock *dp = dccp_sk(sk);
402 struct dccp_service_list *sl = NULL;
404 if (service == DCCP_SERVICE_INVALID_VALUE ||
405 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
408 if (optlen > sizeof(service)) {
409 sl = kmalloc(optlen, GFP_KERNEL);
413 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
414 if (copy_from_user(sl->dccpsl_list,
415 optval + sizeof(service),
416 optlen - sizeof(service)) ||
417 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
424 dp->dccps_service = service;
426 kfree(dp->dccps_service_list);
428 dp->dccps_service_list = sl;
433 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
438 if (cscov < 0 || cscov > 15)
441 * Populate a list of permissible values, in the range cscov...15. This
442 * is necessary since feature negotiation of single values only works if
443 * both sides incidentally choose the same value. Since the list starts
444 * lowest-value first, negotiation will pick the smallest shared value.
450 list = kmalloc(len, GFP_KERNEL);
454 for (i = 0; i < len; i++)
457 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
461 dccp_sk(sk)->dccps_pcrlen = cscov;
463 dccp_sk(sk)->dccps_pcslen = cscov;
469 static int dccp_setsockopt_ccid(struct sock *sk, int type,
470 char __user *optval, int optlen)
475 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
478 val = kmalloc(optlen, GFP_KERNEL);
482 if (copy_from_user(val, optval, optlen)) {
488 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
489 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
491 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
492 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
499 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
500 char __user *optval, int optlen)
502 struct dccp_sock *dp = dccp_sk(sk);
506 case DCCP_SOCKOPT_PACKET_SIZE:
507 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
509 case DCCP_SOCKOPT_CHANGE_L:
510 case DCCP_SOCKOPT_CHANGE_R:
511 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
513 case DCCP_SOCKOPT_CCID:
514 case DCCP_SOCKOPT_RX_CCID:
515 case DCCP_SOCKOPT_TX_CCID:
516 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
519 if (optlen < (int)sizeof(int))
522 if (get_user(val, (int __user *)optval))
525 if (optname == DCCP_SOCKOPT_SERVICE)
526 return dccp_setsockopt_service(sk, val, optval, optlen);
530 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
531 if (dp->dccps_role != DCCP_ROLE_SERVER)
534 dp->dccps_server_timewait = (val != 0);
536 case DCCP_SOCKOPT_SEND_CSCOV:
537 err = dccp_setsockopt_cscov(sk, val, false);
539 case DCCP_SOCKOPT_RECV_CSCOV:
540 err = dccp_setsockopt_cscov(sk, val, true);
551 int dccp_setsockopt(struct sock *sk, int level, int optname,
552 char __user *optval, int optlen)
554 if (level != SOL_DCCP)
555 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
558 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
561 EXPORT_SYMBOL_GPL(dccp_setsockopt);
564 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
565 char __user *optval, int optlen)
567 if (level != SOL_DCCP)
568 return inet_csk_compat_setsockopt(sk, level, optname,
570 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
573 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
576 static int dccp_getsockopt_service(struct sock *sk, int len,
577 __be32 __user *optval,
580 const struct dccp_sock *dp = dccp_sk(sk);
581 const struct dccp_service_list *sl;
582 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
585 if ((sl = dp->dccps_service_list) != NULL) {
586 slen = sl->dccpsl_nr * sizeof(u32);
595 if (put_user(total_len, optlen) ||
596 put_user(dp->dccps_service, optval) ||
597 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
604 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
605 char __user *optval, int __user *optlen)
607 struct dccp_sock *dp;
610 if (get_user(len, optlen))
613 if (len < (int)sizeof(int))
619 case DCCP_SOCKOPT_PACKET_SIZE:
620 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
622 case DCCP_SOCKOPT_SERVICE:
623 return dccp_getsockopt_service(sk, len,
624 (__be32 __user *)optval, optlen);
625 case DCCP_SOCKOPT_GET_CUR_MPS:
626 val = dp->dccps_mss_cache;
628 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
629 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
630 case DCCP_SOCKOPT_TX_CCID:
631 val = ccid_get_current_tx_ccid(dp);
635 case DCCP_SOCKOPT_RX_CCID:
636 val = ccid_get_current_rx_ccid(dp);
640 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
641 val = dp->dccps_server_timewait;
643 case DCCP_SOCKOPT_SEND_CSCOV:
644 val = dp->dccps_pcslen;
646 case DCCP_SOCKOPT_RECV_CSCOV:
647 val = dp->dccps_pcrlen;
650 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
651 len, (u32 __user *)optval, optlen);
653 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
654 len, (u32 __user *)optval, optlen);
660 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
666 int dccp_getsockopt(struct sock *sk, int level, int optname,
667 char __user *optval, int __user *optlen)
669 if (level != SOL_DCCP)
670 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
673 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
676 EXPORT_SYMBOL_GPL(dccp_getsockopt);
679 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
680 char __user *optval, int __user *optlen)
682 if (level != SOL_DCCP)
683 return inet_csk_compat_getsockopt(sk, level, optname,
685 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
688 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
691 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
694 const struct dccp_sock *dp = dccp_sk(sk);
695 const int flags = msg->msg_flags;
696 const int noblock = flags & MSG_DONTWAIT;
701 if (len > dp->dccps_mss_cache)
706 if (sysctl_dccp_tx_qlen &&
707 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
712 timeo = sock_sndtimeo(sk, noblock);
715 * We have to use sk_stream_wait_connect here to set sk_write_pending,
716 * so that the trick in dccp_rcv_request_sent_state_process.
718 /* Wait for a connection to finish. */
719 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
720 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
723 size = sk->sk_prot->max_header + len;
725 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
730 skb_reserve(skb, sk->sk_prot->max_header);
731 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
735 skb_queue_tail(&sk->sk_write_queue, skb);
736 dccp_write_xmit(sk,0);
745 EXPORT_SYMBOL_GPL(dccp_sendmsg);
747 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
748 size_t len, int nonblock, int flags, int *addr_len)
750 const struct dccp_hdr *dh;
755 if (sk->sk_state == DCCP_LISTEN) {
760 timeo = sock_rcvtimeo(sk, nonblock);
763 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
766 goto verify_sock_status;
770 switch (dh->dccph_type) {
772 case DCCP_PKT_DATAACK:
776 case DCCP_PKT_CLOSEREQ:
777 if (!(flags & MSG_PEEK))
778 dccp_finish_passive_close(sk);
781 dccp_pr_debug("found fin (%s) ok!\n",
782 dccp_packet_name(dh->dccph_type));
786 dccp_pr_debug("packet_type=%s\n",
787 dccp_packet_name(dh->dccph_type));
788 sk_eat_skb(sk, skb, 0);
791 if (sock_flag(sk, SOCK_DONE)) {
797 len = sock_error(sk);
801 if (sk->sk_shutdown & RCV_SHUTDOWN) {
806 if (sk->sk_state == DCCP_CLOSED) {
807 if (!sock_flag(sk, SOCK_DONE)) {
808 /* This occurs when user tries to read
809 * from never connected socket.
823 if (signal_pending(current)) {
824 len = sock_intr_errno(timeo);
828 sk_wait_data(sk, &timeo);
833 else if (len < skb->len)
834 msg->msg_flags |= MSG_TRUNC;
836 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
837 /* Exception. Bailout! */
842 if (!(flags & MSG_PEEK))
843 sk_eat_skb(sk, skb, 0);
851 EXPORT_SYMBOL_GPL(dccp_recvmsg);
853 int inet_dccp_listen(struct socket *sock, int backlog)
855 struct sock *sk = sock->sk;
856 unsigned char old_state;
862 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
865 old_state = sk->sk_state;
866 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
869 /* Really, if the socket is already in listen state
870 * we can only allow the backlog to be adjusted.
872 if (old_state != DCCP_LISTEN) {
874 * FIXME: here it probably should be sk->sk_prot->listen_start
875 * see tcp_listen_start
877 err = dccp_listen_start(sk, backlog);
881 sk->sk_max_ack_backlog = backlog;
889 EXPORT_SYMBOL_GPL(inet_dccp_listen);
891 static void dccp_terminate_connection(struct sock *sk)
893 u8 next_state = DCCP_CLOSED;
895 switch (sk->sk_state) {
896 case DCCP_PASSIVE_CLOSE:
897 case DCCP_PASSIVE_CLOSEREQ:
898 dccp_finish_passive_close(sk);
901 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
902 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
905 dccp_send_close(sk, 1);
907 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
908 !dccp_sk(sk)->dccps_server_timewait)
909 next_state = DCCP_ACTIVE_CLOSEREQ;
911 next_state = DCCP_CLOSING;
914 dccp_set_state(sk, next_state);
918 void dccp_close(struct sock *sk, long timeout)
920 struct dccp_sock *dp = dccp_sk(sk);
922 u32 data_was_unread = 0;
927 sk->sk_shutdown = SHUTDOWN_MASK;
929 if (sk->sk_state == DCCP_LISTEN) {
930 dccp_set_state(sk, DCCP_CLOSED);
933 inet_csk_listen_stop(sk);
935 goto adjudge_to_death;
938 sk_stop_timer(sk, &dp->dccps_xmit_timer);
941 * We need to flush the recv. buffs. We do this only on the
942 * descriptor close, not protocol-sourced closes, because the
943 *reader process may not have drained the data yet!
945 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
946 data_was_unread += skb->len;
950 if (data_was_unread) {
951 /* Unread data was tossed, send an appropriate Reset Code */
952 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
953 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
954 dccp_set_state(sk, DCCP_CLOSED);
955 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
956 /* Check zero linger _after_ checking for unread data. */
957 sk->sk_prot->disconnect(sk, 0);
958 } else if (sk->sk_state != DCCP_CLOSED) {
959 dccp_terminate_connection(sk);
962 sk_stream_wait_close(sk, timeout);
965 state = sk->sk_state;
968 percpu_counter_inc(sk->sk_prot->orphan_count);
971 * It is the last release_sock in its life. It will remove backlog.
975 * Now socket is owned by kernel and we acquire BH lock
976 * to finish close. No need to check for user refs.
980 WARN_ON(sock_owned_by_user(sk));
982 /* Have we already been destroyed by a softirq or backlog? */
983 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
986 if (sk->sk_state == DCCP_CLOSED)
987 inet_csk_destroy_sock(sk);
989 /* Otherwise, socket is reprieved until protocol close. */
997 EXPORT_SYMBOL_GPL(dccp_close);
999 void dccp_shutdown(struct sock *sk, int how)
1001 dccp_pr_debug("called shutdown(%x)\n", how);
1004 EXPORT_SYMBOL_GPL(dccp_shutdown);
1006 static inline int dccp_mib_init(void)
1008 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1011 static inline void dccp_mib_exit(void)
1013 snmp_mib_free((void**)dccp_statistics);
1016 static int thash_entries;
1017 module_param(thash_entries, int, 0444);
1018 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1020 #ifdef CONFIG_IP_DCCP_DEBUG
1022 module_param(dccp_debug, bool, 0644);
1023 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1025 EXPORT_SYMBOL_GPL(dccp_debug);
1028 static int __init dccp_init(void)
1031 int ehash_order, bhash_order, i;
1034 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1035 FIELD_SIZEOF(struct sk_buff, cb));
1036 rc = percpu_counter_init(&dccp_orphan_count, 0);
1040 inet_hashinfo_init(&dccp_hashinfo);
1041 dccp_hashinfo.bind_bucket_cachep =
1042 kmem_cache_create("dccp_bind_bucket",
1043 sizeof(struct inet_bind_bucket), 0,
1044 SLAB_HWCACHE_ALIGN, NULL);
1045 if (!dccp_hashinfo.bind_bucket_cachep)
1046 goto out_free_percpu;
1049 * Size and allocate the main established and bind bucket
1052 * The methodology is similar to that of the buffer cache.
1054 if (num_physpages >= (128 * 1024))
1055 goal = num_physpages >> (21 - PAGE_SHIFT);
1057 goal = num_physpages >> (23 - PAGE_SHIFT);
1060 goal = (thash_entries *
1061 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1062 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1065 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1066 sizeof(struct inet_ehash_bucket);
1067 while (dccp_hashinfo.ehash_size &
1068 (dccp_hashinfo.ehash_size - 1))
1069 dccp_hashinfo.ehash_size--;
1070 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1071 __get_free_pages(GFP_ATOMIC, ehash_order);
1072 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1074 if (!dccp_hashinfo.ehash) {
1075 DCCP_CRIT("Failed to allocate DCCP established hash table");
1076 goto out_free_bind_bucket_cachep;
1079 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1080 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1081 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1084 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1085 goto out_free_dccp_ehash;
1087 bhash_order = ehash_order;
1090 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1091 sizeof(struct inet_bind_hashbucket);
1092 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1095 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1096 __get_free_pages(GFP_ATOMIC, bhash_order);
1097 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1099 if (!dccp_hashinfo.bhash) {
1100 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1101 goto out_free_dccp_locks;
1104 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1105 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1106 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1109 rc = dccp_mib_init();
1111 goto out_free_dccp_bhash;
1113 rc = dccp_ackvec_init();
1115 goto out_free_dccp_mib;
1117 rc = dccp_sysctl_init();
1119 goto out_ackvec_exit;
1121 dccp_timestamping_init();
1128 out_free_dccp_bhash:
1129 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1130 dccp_hashinfo.bhash = NULL;
1131 out_free_dccp_locks:
1132 inet_ehash_locks_free(&dccp_hashinfo);
1133 out_free_dccp_ehash:
1134 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1135 dccp_hashinfo.ehash = NULL;
1136 out_free_bind_bucket_cachep:
1137 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1138 dccp_hashinfo.bind_bucket_cachep = NULL;
1140 percpu_counter_destroy(&dccp_orphan_count);
1144 static void __exit dccp_fini(void)
1147 free_pages((unsigned long)dccp_hashinfo.bhash,
1148 get_order(dccp_hashinfo.bhash_size *
1149 sizeof(struct inet_bind_hashbucket)));
1150 free_pages((unsigned long)dccp_hashinfo.ehash,
1151 get_order(dccp_hashinfo.ehash_size *
1152 sizeof(struct inet_ehash_bucket)));
1153 inet_ehash_locks_free(&dccp_hashinfo);
1154 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1159 module_init(dccp_init);
1160 module_exit(dccp_fini);
1162 MODULE_LICENSE("GPL");
1163 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1164 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");