108d56bd25c52584191c7b3c40db34a25769cf65
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         INIT_LIST_HEAD(&dp->dccps_featneg);
197         /*
198          * FIXME: We're hardcoding the CCID, and doing this at this point makes
199          * the listening (master) sock get CCID control blocks, which is not
200          * necessary, but for now, to not mess with the test userspace apps,
201          * lets leave it here, later the real solution is to do this in a
202          * setsockopt(CCIDs-I-want/accept). -acme
203          */
204         if (likely(ctl_sock_initialized)) {
205                 int rc = dccp_feat_init(sk);
206
207                 if (rc)
208                         return rc;
209
210                 if (dmsk->dccpms_send_ack_vector) {
211                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212                         if (dp->dccps_hc_rx_ackvec == NULL)
213                                 return -ENOMEM;
214                 }
215                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216                                                       sk, GFP_KERNEL);
217                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218                                                       sk, GFP_KERNEL);
219                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220                              dp->dccps_hc_tx_ccid == NULL)) {
221                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223                         if (dmsk->dccpms_send_ack_vector) {
224                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225                                 dp->dccps_hc_rx_ackvec = NULL;
226                         }
227                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228                         return -ENOMEM;
229                 }
230         } else {
231                 /* control socket doesn't need feat nego */
232                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
234         }
235
236         return 0;
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
240
241 void dccp_destroy_sock(struct sock *sk)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244         struct dccp_minisock *dmsk = dccp_msk(sk);
245
246         /*
247          * DCCP doesn't use sk_write_queue, just sk_send_head
248          * for retransmissions
249          */
250         if (sk->sk_send_head != NULL) {
251                 kfree_skb(sk->sk_send_head);
252                 sk->sk_send_head = NULL;
253         }
254
255         /* Clean up a referenced DCCP bind bucket. */
256         if (inet_csk(sk)->icsk_bind_hash != NULL)
257                 inet_put_port(sk);
258
259         kfree(dp->dccps_service_list);
260         dp->dccps_service_list = NULL;
261
262         if (dmsk->dccpms_send_ack_vector) {
263                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264                 dp->dccps_hc_rx_ackvec = NULL;
265         }
266         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
269
270         /* clean up feature negotiation state */
271         dccp_feat_list_purge(&dp->dccps_featneg);
272 }
273
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
275
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
277 {
278         struct dccp_sock *dp = dccp_sk(sk);
279
280         dp->dccps_role = DCCP_ROLE_LISTEN;
281         /* do not start to listen if feature negotiation setup fails */
282         if (dccp_feat_finalise_settings(dp))
283                 return -EPROTO;
284         return inet_csk_listen_start(sk, backlog);
285 }
286
287 static inline int dccp_need_reset(int state)
288 {
289         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
290                state != DCCP_REQUESTING;
291 }
292
293 int dccp_disconnect(struct sock *sk, int flags)
294 {
295         struct inet_connection_sock *icsk = inet_csk(sk);
296         struct inet_sock *inet = inet_sk(sk);
297         int err = 0;
298         const int old_state = sk->sk_state;
299
300         if (old_state != DCCP_CLOSED)
301                 dccp_set_state(sk, DCCP_CLOSED);
302
303         /*
304          * This corresponds to the ABORT function of RFC793, sec. 3.8
305          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
306          */
307         if (old_state == DCCP_LISTEN) {
308                 inet_csk_listen_stop(sk);
309         } else if (dccp_need_reset(old_state)) {
310                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
311                 sk->sk_err = ECONNRESET;
312         } else if (old_state == DCCP_REQUESTING)
313                 sk->sk_err = ECONNRESET;
314
315         dccp_clear_xmit_timers(sk);
316
317         __skb_queue_purge(&sk->sk_receive_queue);
318         __skb_queue_purge(&sk->sk_write_queue);
319         if (sk->sk_send_head != NULL) {
320                 __kfree_skb(sk->sk_send_head);
321                 sk->sk_send_head = NULL;
322         }
323
324         inet->dport = 0;
325
326         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
327                 inet_reset_saddr(sk);
328
329         sk->sk_shutdown = 0;
330         sock_reset_flag(sk, SOCK_DONE);
331
332         icsk->icsk_backoff = 0;
333         inet_csk_delack_init(sk);
334         __sk_dst_reset(sk);
335
336         WARN_ON(inet->num && !icsk->icsk_bind_hash);
337
338         sk->sk_error_report(sk);
339         return err;
340 }
341
342 EXPORT_SYMBOL_GPL(dccp_disconnect);
343
344 /*
345  *      Wait for a DCCP event.
346  *
347  *      Note that we don't need to lock the socket, as the upper poll layers
348  *      take care of normal races (between the test and the event) and we don't
349  *      go look at any of the socket buffers directly.
350  */
351 unsigned int dccp_poll(struct file *file, struct socket *sock,
352                        poll_table *wait)
353 {
354         unsigned int mask;
355         struct sock *sk = sock->sk;
356
357         poll_wait(file, sk->sk_sleep, wait);
358         if (sk->sk_state == DCCP_LISTEN)
359                 return inet_csk_listen_poll(sk);
360
361         /* Socket is not locked. We are protected from async events
362            by poll logic and correct handling of state changes
363            made by another threads is impossible in any case.
364          */
365
366         mask = 0;
367         if (sk->sk_err)
368                 mask = POLLERR;
369
370         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
371                 mask |= POLLHUP;
372         if (sk->sk_shutdown & RCV_SHUTDOWN)
373                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
374
375         /* Connected? */
376         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
377                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
378                         mask |= POLLIN | POLLRDNORM;
379
380                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
381                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
382                                 mask |= POLLOUT | POLLWRNORM;
383                         } else {  /* send SIGIO later */
384                                 set_bit(SOCK_ASYNC_NOSPACE,
385                                         &sk->sk_socket->flags);
386                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
387
388                                 /* Race breaker. If space is freed after
389                                  * wspace test but before the flags are set,
390                                  * IO signal will be lost.
391                                  */
392                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
393                                         mask |= POLLOUT | POLLWRNORM;
394                         }
395                 }
396         }
397         return mask;
398 }
399
400 EXPORT_SYMBOL_GPL(dccp_poll);
401
402 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
403 {
404         int rc = -ENOTCONN;
405
406         lock_sock(sk);
407
408         if (sk->sk_state == DCCP_LISTEN)
409                 goto out;
410
411         switch (cmd) {
412         case SIOCINQ: {
413                 struct sk_buff *skb;
414                 unsigned long amount = 0;
415
416                 skb = skb_peek(&sk->sk_receive_queue);
417                 if (skb != NULL) {
418                         /*
419                          * We will only return the amount of this packet since
420                          * that is all that will be read.
421                          */
422                         amount = skb->len;
423                 }
424                 rc = put_user(amount, (int __user *)arg);
425         }
426                 break;
427         default:
428                 rc = -ENOIOCTLCMD;
429                 break;
430         }
431 out:
432         release_sock(sk);
433         return rc;
434 }
435
436 EXPORT_SYMBOL_GPL(dccp_ioctl);
437
438 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
439                                    char __user *optval, int optlen)
440 {
441         struct dccp_sock *dp = dccp_sk(sk);
442         struct dccp_service_list *sl = NULL;
443
444         if (service == DCCP_SERVICE_INVALID_VALUE ||
445             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
446                 return -EINVAL;
447
448         if (optlen > sizeof(service)) {
449                 sl = kmalloc(optlen, GFP_KERNEL);
450                 if (sl == NULL)
451                         return -ENOMEM;
452
453                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
454                 if (copy_from_user(sl->dccpsl_list,
455                                    optval + sizeof(service),
456                                    optlen - sizeof(service)) ||
457                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
458                         kfree(sl);
459                         return -EFAULT;
460                 }
461         }
462
463         lock_sock(sk);
464         dp->dccps_service = service;
465
466         kfree(dp->dccps_service_list);
467
468         dp->dccps_service_list = sl;
469         release_sock(sk);
470         return 0;
471 }
472
473 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
474                 char __user *optval, int optlen)
475 {
476         struct dccp_sock *dp = dccp_sk(sk);
477         int val, err = 0;
478
479         if (optlen < sizeof(int))
480                 return -EINVAL;
481
482         if (get_user(val, (int __user *)optval))
483                 return -EFAULT;
484
485         if (optname == DCCP_SOCKOPT_SERVICE)
486                 return dccp_setsockopt_service(sk, val, optval, optlen);
487
488         lock_sock(sk);
489         switch (optname) {
490         case DCCP_SOCKOPT_PACKET_SIZE:
491                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
492                 err = 0;
493                 break;
494         case DCCP_SOCKOPT_CHANGE_L:
495         case DCCP_SOCKOPT_CHANGE_R:
496                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
497                 err = 0;
498                 break;
499         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
500                 if (dp->dccps_role != DCCP_ROLE_SERVER)
501                         err = -EOPNOTSUPP;
502                 else
503                         dp->dccps_server_timewait = (val != 0);
504                 break;
505         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
506                 if (val < 0 || val > 15)
507                         err = -EINVAL;
508                 else
509                         dp->dccps_pcslen = val;
510                 break;
511         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
512                 if (val < 0 || val > 15)
513                         err = -EINVAL;
514                 else {
515                         dp->dccps_pcrlen = val;
516                         /* FIXME: add feature negotiation,
517                          * ChangeL(MinimumChecksumCoverage, val) */
518                 }
519                 break;
520         default:
521                 err = -ENOPROTOOPT;
522                 break;
523         }
524
525         release_sock(sk);
526         return err;
527 }
528
529 int dccp_setsockopt(struct sock *sk, int level, int optname,
530                     char __user *optval, int optlen)
531 {
532         if (level != SOL_DCCP)
533                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
534                                                              optname, optval,
535                                                              optlen);
536         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
537 }
538
539 EXPORT_SYMBOL_GPL(dccp_setsockopt);
540
541 #ifdef CONFIG_COMPAT
542 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
543                            char __user *optval, int optlen)
544 {
545         if (level != SOL_DCCP)
546                 return inet_csk_compat_setsockopt(sk, level, optname,
547                                                   optval, optlen);
548         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
549 }
550
551 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
552 #endif
553
554 static int dccp_getsockopt_service(struct sock *sk, int len,
555                                    __be32 __user *optval,
556                                    int __user *optlen)
557 {
558         const struct dccp_sock *dp = dccp_sk(sk);
559         const struct dccp_service_list *sl;
560         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
561
562         lock_sock(sk);
563         if ((sl = dp->dccps_service_list) != NULL) {
564                 slen = sl->dccpsl_nr * sizeof(u32);
565                 total_len += slen;
566         }
567
568         err = -EINVAL;
569         if (total_len > len)
570                 goto out;
571
572         err = 0;
573         if (put_user(total_len, optlen) ||
574             put_user(dp->dccps_service, optval) ||
575             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
576                 err = -EFAULT;
577 out:
578         release_sock(sk);
579         return err;
580 }
581
582 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
583                     char __user *optval, int __user *optlen)
584 {
585         struct dccp_sock *dp;
586         int val, len;
587
588         if (get_user(len, optlen))
589                 return -EFAULT;
590
591         if (len < (int)sizeof(int))
592                 return -EINVAL;
593
594         dp = dccp_sk(sk);
595
596         switch (optname) {
597         case DCCP_SOCKOPT_PACKET_SIZE:
598                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
599                 return 0;
600         case DCCP_SOCKOPT_SERVICE:
601                 return dccp_getsockopt_service(sk, len,
602                                                (__be32 __user *)optval, optlen);
603         case DCCP_SOCKOPT_GET_CUR_MPS:
604                 val = dp->dccps_mss_cache;
605                 break;
606         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
607                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
608         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
609                 val = dp->dccps_server_timewait;
610                 break;
611         case DCCP_SOCKOPT_SEND_CSCOV:
612                 val = dp->dccps_pcslen;
613                 break;
614         case DCCP_SOCKOPT_RECV_CSCOV:
615                 val = dp->dccps_pcrlen;
616                 break;
617         case 128 ... 191:
618                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
619                                              len, (u32 __user *)optval, optlen);
620         case 192 ... 255:
621                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
622                                              len, (u32 __user *)optval, optlen);
623         default:
624                 return -ENOPROTOOPT;
625         }
626
627         len = sizeof(val);
628         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
629                 return -EFAULT;
630
631         return 0;
632 }
633
634 int dccp_getsockopt(struct sock *sk, int level, int optname,
635                     char __user *optval, int __user *optlen)
636 {
637         if (level != SOL_DCCP)
638                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
639                                                              optname, optval,
640                                                              optlen);
641         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
642 }
643
644 EXPORT_SYMBOL_GPL(dccp_getsockopt);
645
646 #ifdef CONFIG_COMPAT
647 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
648                            char __user *optval, int __user *optlen)
649 {
650         if (level != SOL_DCCP)
651                 return inet_csk_compat_getsockopt(sk, level, optname,
652                                                   optval, optlen);
653         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
654 }
655
656 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
657 #endif
658
659 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
660                  size_t len)
661 {
662         const struct dccp_sock *dp = dccp_sk(sk);
663         const int flags = msg->msg_flags;
664         const int noblock = flags & MSG_DONTWAIT;
665         struct sk_buff *skb;
666         int rc, size;
667         long timeo;
668
669         if (len > dp->dccps_mss_cache)
670                 return -EMSGSIZE;
671
672         lock_sock(sk);
673
674         if (sysctl_dccp_tx_qlen &&
675             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
676                 rc = -EAGAIN;
677                 goto out_release;
678         }
679
680         timeo = sock_sndtimeo(sk, noblock);
681
682         /*
683          * We have to use sk_stream_wait_connect here to set sk_write_pending,
684          * so that the trick in dccp_rcv_request_sent_state_process.
685          */
686         /* Wait for a connection to finish. */
687         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
688                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
689                         goto out_release;
690
691         size = sk->sk_prot->max_header + len;
692         release_sock(sk);
693         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
694         lock_sock(sk);
695         if (skb == NULL)
696                 goto out_release;
697
698         skb_reserve(skb, sk->sk_prot->max_header);
699         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
700         if (rc != 0)
701                 goto out_discard;
702
703         skb_queue_tail(&sk->sk_write_queue, skb);
704         dccp_write_xmit(sk,0);
705 out_release:
706         release_sock(sk);
707         return rc ? : len;
708 out_discard:
709         kfree_skb(skb);
710         goto out_release;
711 }
712
713 EXPORT_SYMBOL_GPL(dccp_sendmsg);
714
715 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
716                  size_t len, int nonblock, int flags, int *addr_len)
717 {
718         const struct dccp_hdr *dh;
719         long timeo;
720
721         lock_sock(sk);
722
723         if (sk->sk_state == DCCP_LISTEN) {
724                 len = -ENOTCONN;
725                 goto out;
726         }
727
728         timeo = sock_rcvtimeo(sk, nonblock);
729
730         do {
731                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
732
733                 if (skb == NULL)
734                         goto verify_sock_status;
735
736                 dh = dccp_hdr(skb);
737
738                 switch (dh->dccph_type) {
739                 case DCCP_PKT_DATA:
740                 case DCCP_PKT_DATAACK:
741                         goto found_ok_skb;
742
743                 case DCCP_PKT_CLOSE:
744                 case DCCP_PKT_CLOSEREQ:
745                         if (!(flags & MSG_PEEK))
746                                 dccp_finish_passive_close(sk);
747                         /* fall through */
748                 case DCCP_PKT_RESET:
749                         dccp_pr_debug("found fin (%s) ok!\n",
750                                       dccp_packet_name(dh->dccph_type));
751                         len = 0;
752                         goto found_fin_ok;
753                 default:
754                         dccp_pr_debug("packet_type=%s\n",
755                                       dccp_packet_name(dh->dccph_type));
756                         sk_eat_skb(sk, skb, 0);
757                 }
758 verify_sock_status:
759                 if (sock_flag(sk, SOCK_DONE)) {
760                         len = 0;
761                         break;
762                 }
763
764                 if (sk->sk_err) {
765                         len = sock_error(sk);
766                         break;
767                 }
768
769                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
770                         len = 0;
771                         break;
772                 }
773
774                 if (sk->sk_state == DCCP_CLOSED) {
775                         if (!sock_flag(sk, SOCK_DONE)) {
776                                 /* This occurs when user tries to read
777                                  * from never connected socket.
778                                  */
779                                 len = -ENOTCONN;
780                                 break;
781                         }
782                         len = 0;
783                         break;
784                 }
785
786                 if (!timeo) {
787                         len = -EAGAIN;
788                         break;
789                 }
790
791                 if (signal_pending(current)) {
792                         len = sock_intr_errno(timeo);
793                         break;
794                 }
795
796                 sk_wait_data(sk, &timeo);
797                 continue;
798         found_ok_skb:
799                 if (len > skb->len)
800                         len = skb->len;
801                 else if (len < skb->len)
802                         msg->msg_flags |= MSG_TRUNC;
803
804                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
805                         /* Exception. Bailout! */
806                         len = -EFAULT;
807                         break;
808                 }
809         found_fin_ok:
810                 if (!(flags & MSG_PEEK))
811                         sk_eat_skb(sk, skb, 0);
812                 break;
813         } while (1);
814 out:
815         release_sock(sk);
816         return len;
817 }
818
819 EXPORT_SYMBOL_GPL(dccp_recvmsg);
820
821 int inet_dccp_listen(struct socket *sock, int backlog)
822 {
823         struct sock *sk = sock->sk;
824         unsigned char old_state;
825         int err;
826
827         lock_sock(sk);
828
829         err = -EINVAL;
830         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
831                 goto out;
832
833         old_state = sk->sk_state;
834         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
835                 goto out;
836
837         /* Really, if the socket is already in listen state
838          * we can only allow the backlog to be adjusted.
839          */
840         if (old_state != DCCP_LISTEN) {
841                 /*
842                  * FIXME: here it probably should be sk->sk_prot->listen_start
843                  * see tcp_listen_start
844                  */
845                 err = dccp_listen_start(sk, backlog);
846                 if (err)
847                         goto out;
848         }
849         sk->sk_max_ack_backlog = backlog;
850         err = 0;
851
852 out:
853         release_sock(sk);
854         return err;
855 }
856
857 EXPORT_SYMBOL_GPL(inet_dccp_listen);
858
859 static void dccp_terminate_connection(struct sock *sk)
860 {
861         u8 next_state = DCCP_CLOSED;
862
863         switch (sk->sk_state) {
864         case DCCP_PASSIVE_CLOSE:
865         case DCCP_PASSIVE_CLOSEREQ:
866                 dccp_finish_passive_close(sk);
867                 break;
868         case DCCP_PARTOPEN:
869                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
870                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
871                 /* fall through */
872         case DCCP_OPEN:
873                 dccp_send_close(sk, 1);
874
875                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
876                     !dccp_sk(sk)->dccps_server_timewait)
877                         next_state = DCCP_ACTIVE_CLOSEREQ;
878                 else
879                         next_state = DCCP_CLOSING;
880                 /* fall through */
881         default:
882                 dccp_set_state(sk, next_state);
883         }
884 }
885
886 void dccp_close(struct sock *sk, long timeout)
887 {
888         struct dccp_sock *dp = dccp_sk(sk);
889         struct sk_buff *skb;
890         u32 data_was_unread = 0;
891         int state;
892
893         lock_sock(sk);
894
895         sk->sk_shutdown = SHUTDOWN_MASK;
896
897         if (sk->sk_state == DCCP_LISTEN) {
898                 dccp_set_state(sk, DCCP_CLOSED);
899
900                 /* Special case. */
901                 inet_csk_listen_stop(sk);
902
903                 goto adjudge_to_death;
904         }
905
906         sk_stop_timer(sk, &dp->dccps_xmit_timer);
907
908         /*
909          * We need to flush the recv. buffs.  We do this only on the
910          * descriptor close, not protocol-sourced closes, because the
911           *reader process may not have drained the data yet!
912          */
913         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
914                 data_was_unread += skb->len;
915                 __kfree_skb(skb);
916         }
917
918         if (data_was_unread) {
919                 /* Unread data was tossed, send an appropriate Reset Code */
920                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
921                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
922                 dccp_set_state(sk, DCCP_CLOSED);
923         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
924                 /* Check zero linger _after_ checking for unread data. */
925                 sk->sk_prot->disconnect(sk, 0);
926         } else if (sk->sk_state != DCCP_CLOSED) {
927                 dccp_terminate_connection(sk);
928         }
929
930         sk_stream_wait_close(sk, timeout);
931
932 adjudge_to_death:
933         state = sk->sk_state;
934         sock_hold(sk);
935         sock_orphan(sk);
936         atomic_inc(sk->sk_prot->orphan_count);
937
938         /*
939          * It is the last release_sock in its life. It will remove backlog.
940          */
941         release_sock(sk);
942         /*
943          * Now socket is owned by kernel and we acquire BH lock
944          * to finish close. No need to check for user refs.
945          */
946         local_bh_disable();
947         bh_lock_sock(sk);
948         WARN_ON(sock_owned_by_user(sk));
949
950         /* Have we already been destroyed by a softirq or backlog? */
951         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
952                 goto out;
953
954         if (sk->sk_state == DCCP_CLOSED)
955                 inet_csk_destroy_sock(sk);
956
957         /* Otherwise, socket is reprieved until protocol close. */
958
959 out:
960         bh_unlock_sock(sk);
961         local_bh_enable();
962         sock_put(sk);
963 }
964
965 EXPORT_SYMBOL_GPL(dccp_close);
966
967 void dccp_shutdown(struct sock *sk, int how)
968 {
969         dccp_pr_debug("called shutdown(%x)\n", how);
970 }
971
972 EXPORT_SYMBOL_GPL(dccp_shutdown);
973
974 static inline int dccp_mib_init(void)
975 {
976         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
977 }
978
979 static inline void dccp_mib_exit(void)
980 {
981         snmp_mib_free((void**)dccp_statistics);
982 }
983
984 static int thash_entries;
985 module_param(thash_entries, int, 0444);
986 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
987
988 #ifdef CONFIG_IP_DCCP_DEBUG
989 int dccp_debug;
990 module_param(dccp_debug, bool, 0644);
991 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
992
993 EXPORT_SYMBOL_GPL(dccp_debug);
994 #endif
995
996 static int __init dccp_init(void)
997 {
998         unsigned long goal;
999         int ehash_order, bhash_order, i;
1000         int rc = -ENOBUFS;
1001
1002         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1003                      FIELD_SIZEOF(struct sk_buff, cb));
1004
1005         dccp_hashinfo.bind_bucket_cachep =
1006                 kmem_cache_create("dccp_bind_bucket",
1007                                   sizeof(struct inet_bind_bucket), 0,
1008                                   SLAB_HWCACHE_ALIGN, NULL);
1009         if (!dccp_hashinfo.bind_bucket_cachep)
1010                 goto out;
1011
1012         /*
1013          * Size and allocate the main established and bind bucket
1014          * hash tables.
1015          *
1016          * The methodology is similar to that of the buffer cache.
1017          */
1018         if (num_physpages >= (128 * 1024))
1019                 goal = num_physpages >> (21 - PAGE_SHIFT);
1020         else
1021                 goal = num_physpages >> (23 - PAGE_SHIFT);
1022
1023         if (thash_entries)
1024                 goal = (thash_entries *
1025                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1026         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1027                 ;
1028         do {
1029                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1030                                         sizeof(struct inet_ehash_bucket);
1031                 while (dccp_hashinfo.ehash_size &
1032                        (dccp_hashinfo.ehash_size - 1))
1033                         dccp_hashinfo.ehash_size--;
1034                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1035                         __get_free_pages(GFP_ATOMIC, ehash_order);
1036         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1037
1038         if (!dccp_hashinfo.ehash) {
1039                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1040                 goto out_free_bind_bucket_cachep;
1041         }
1042
1043         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1044                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1045                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1046         }
1047
1048         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1049                         goto out_free_dccp_ehash;
1050
1051         bhash_order = ehash_order;
1052
1053         do {
1054                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1055                                         sizeof(struct inet_bind_hashbucket);
1056                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1057                     bhash_order > 0)
1058                         continue;
1059                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1060                         __get_free_pages(GFP_ATOMIC, bhash_order);
1061         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1062
1063         if (!dccp_hashinfo.bhash) {
1064                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1065                 goto out_free_dccp_locks;
1066         }
1067
1068         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1069                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1070                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1071         }
1072
1073         rc = dccp_mib_init();
1074         if (rc)
1075                 goto out_free_dccp_bhash;
1076
1077         rc = dccp_ackvec_init();
1078         if (rc)
1079                 goto out_free_dccp_mib;
1080
1081         rc = dccp_sysctl_init();
1082         if (rc)
1083                 goto out_ackvec_exit;
1084
1085         dccp_timestamping_init();
1086 out:
1087         return rc;
1088 out_ackvec_exit:
1089         dccp_ackvec_exit();
1090 out_free_dccp_mib:
1091         dccp_mib_exit();
1092 out_free_dccp_bhash:
1093         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1094         dccp_hashinfo.bhash = NULL;
1095 out_free_dccp_locks:
1096         inet_ehash_locks_free(&dccp_hashinfo);
1097 out_free_dccp_ehash:
1098         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1099         dccp_hashinfo.ehash = NULL;
1100 out_free_bind_bucket_cachep:
1101         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1102         dccp_hashinfo.bind_bucket_cachep = NULL;
1103         goto out;
1104 }
1105
1106 static void __exit dccp_fini(void)
1107 {
1108         dccp_mib_exit();
1109         free_pages((unsigned long)dccp_hashinfo.bhash,
1110                    get_order(dccp_hashinfo.bhash_size *
1111                              sizeof(struct inet_bind_hashbucket)));
1112         free_pages((unsigned long)dccp_hashinfo.ehash,
1113                    get_order(dccp_hashinfo.ehash_size *
1114                              sizeof(struct inet_ehash_bucket)));
1115         inet_ehash_locks_free(&dccp_hashinfo);
1116         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1117         dccp_ackvec_exit();
1118         dccp_sysctl_exit();
1119 }
1120
1121 module_init(dccp_init);
1122 module_exit(dccp_fini);
1123
1124 MODULE_LICENSE("GPL");
1125 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1126 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");