[DCCP]: Support for server holding timewait state
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49         .lhash_lock     = RW_LOCK_UNLOCKED,
50         .lhash_users    = ATOMIC_INIT(0),
51         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
58
59 void dccp_set_state(struct sock *sk, const int state)
60 {
61         const int oldstate = sk->sk_state;
62
63         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
64                       dccp_state_name(oldstate), dccp_state_name(state));
65         WARN_ON(state == oldstate);
66
67         switch (state) {
68         case DCCP_OPEN:
69                 if (oldstate != DCCP_OPEN)
70                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71                 break;
72
73         case DCCP_CLOSED:
74                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75                     oldstate == DCCP_CLOSING)
76                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78                 sk->sk_prot->unhash(sk);
79                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81                         inet_put_port(&dccp_hashinfo, sk);
82                 /* fall through */
83         default:
84                 if (oldstate == DCCP_OPEN)
85                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86         }
87
88         /* Change state AFTER socket is unhashed to avoid closed
89          * socket sitting in hash tables.
90          */
91         sk->sk_state = state;
92 }
93
94 EXPORT_SYMBOL_GPL(dccp_set_state);
95
96 static void dccp_finish_passive_close(struct sock *sk)
97 {
98         switch (sk->sk_state) {
99         case DCCP_PASSIVE_CLOSE:
100                 /* Node (client or server) has received Close packet. */
101                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102                 dccp_set_state(sk, DCCP_CLOSED);
103                 break;
104         case DCCP_PASSIVE_CLOSEREQ:
105                 /*
106                  * Client received CloseReq. We set the `active' flag so that
107                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108                  */
109                 dccp_send_close(sk, 1);
110                 dccp_set_state(sk, DCCP_CLOSING);
111         }
112 }
113
114 void dccp_done(struct sock *sk)
115 {
116         dccp_set_state(sk, DCCP_CLOSED);
117         dccp_clear_xmit_timers(sk);
118
119         sk->sk_shutdown = SHUTDOWN_MASK;
120
121         if (!sock_flag(sk, SOCK_DEAD))
122                 sk->sk_state_change(sk);
123         else
124                 inet_csk_destroy_sock(sk);
125 }
126
127 EXPORT_SYMBOL_GPL(dccp_done);
128
129 const char *dccp_packet_name(const int type)
130 {
131         static const char *dccp_packet_names[] = {
132                 [DCCP_PKT_REQUEST]  = "REQUEST",
133                 [DCCP_PKT_RESPONSE] = "RESPONSE",
134                 [DCCP_PKT_DATA]     = "DATA",
135                 [DCCP_PKT_ACK]      = "ACK",
136                 [DCCP_PKT_DATAACK]  = "DATAACK",
137                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138                 [DCCP_PKT_CLOSE]    = "CLOSE",
139                 [DCCP_PKT_RESET]    = "RESET",
140                 [DCCP_PKT_SYNC]     = "SYNC",
141                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
142         };
143
144         if (type >= DCCP_NR_PKT_TYPES)
145                 return "INVALID";
146         else
147                 return dccp_packet_names[type];
148 }
149
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
151
152 const char *dccp_state_name(const int state)
153 {
154         static char *dccp_state_names[] = {
155         [DCCP_OPEN]             = "OPEN",
156         [DCCP_REQUESTING]       = "REQUESTING",
157         [DCCP_PARTOPEN]         = "PARTOPEN",
158         [DCCP_LISTEN]           = "LISTEN",
159         [DCCP_RESPOND]          = "RESPOND",
160         [DCCP_CLOSING]          = "CLOSING",
161         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
162         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
163         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164         [DCCP_TIME_WAIT]        = "TIME_WAIT",
165         [DCCP_CLOSED]           = "CLOSED",
166         };
167
168         if (state >= DCCP_MAX_STATES)
169                 return "INVALID STATE!";
170         else
171                 return dccp_state_names[state];
172 }
173
174 EXPORT_SYMBOL_GPL(dccp_state_name);
175
176 void dccp_hash(struct sock *sk)
177 {
178         inet_hash(&dccp_hashinfo, sk);
179 }
180
181 EXPORT_SYMBOL_GPL(dccp_hash);
182
183 void dccp_unhash(struct sock *sk)
184 {
185         inet_unhash(&dccp_hashinfo, sk);
186 }
187
188 EXPORT_SYMBOL_GPL(dccp_unhash);
189
190 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
191 {
192         struct dccp_sock *dp = dccp_sk(sk);
193         struct dccp_minisock *dmsk = dccp_msk(sk);
194         struct inet_connection_sock *icsk = inet_csk(sk);
195
196         dccp_minisock_init(&dp->dccps_minisock);
197
198         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
199         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
200         sk->sk_state            = DCCP_CLOSED;
201         sk->sk_write_space      = dccp_write_space;
202         icsk->icsk_sync_mss     = dccp_sync_mss;
203         dp->dccps_mss_cache     = 536;
204         dp->dccps_rate_last     = jiffies;
205         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
206         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
207         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
208
209         dccp_init_xmit_timers(sk);
210
211         /*
212          * FIXME: We're hardcoding the CCID, and doing this at this point makes
213          * the listening (master) sock get CCID control blocks, which is not
214          * necessary, but for now, to not mess with the test userspace apps,
215          * lets leave it here, later the real solution is to do this in a
216          * setsockopt(CCIDs-I-want/accept). -acme
217          */
218         if (likely(ctl_sock_initialized)) {
219                 int rc = dccp_feat_init(dmsk);
220
221                 if (rc)
222                         return rc;
223
224                 if (dmsk->dccpms_send_ack_vector) {
225                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
226                         if (dp->dccps_hc_rx_ackvec == NULL)
227                                 return -ENOMEM;
228                 }
229                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
230                                                       sk, GFP_KERNEL);
231                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
232                                                       sk, GFP_KERNEL);
233                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
234                              dp->dccps_hc_tx_ccid == NULL)) {
235                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
237                         if (dmsk->dccpms_send_ack_vector) {
238                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239                                 dp->dccps_hc_rx_ackvec = NULL;
240                         }
241                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
242                         return -ENOMEM;
243                 }
244         } else {
245                 /* control socket doesn't need feat nego */
246                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
247                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
248         }
249
250         return 0;
251 }
252
253 EXPORT_SYMBOL_GPL(dccp_init_sock);
254
255 int dccp_destroy_sock(struct sock *sk)
256 {
257         struct dccp_sock *dp = dccp_sk(sk);
258         struct dccp_minisock *dmsk = dccp_msk(sk);
259
260         /*
261          * DCCP doesn't use sk_write_queue, just sk_send_head
262          * for retransmissions
263          */
264         if (sk->sk_send_head != NULL) {
265                 kfree_skb(sk->sk_send_head);
266                 sk->sk_send_head = NULL;
267         }
268
269         /* Clean up a referenced DCCP bind bucket. */
270         if (inet_csk(sk)->icsk_bind_hash != NULL)
271                 inet_put_port(&dccp_hashinfo, sk);
272
273         kfree(dp->dccps_service_list);
274         dp->dccps_service_list = NULL;
275
276         if (dmsk->dccpms_send_ack_vector) {
277                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
278                 dp->dccps_hc_rx_ackvec = NULL;
279         }
280         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
281         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
282         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
283
284         /* clean up feature negotiation state */
285         dccp_feat_clean(dmsk);
286
287         return 0;
288 }
289
290 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
291
292 static inline int dccp_listen_start(struct sock *sk, int backlog)
293 {
294         struct dccp_sock *dp = dccp_sk(sk);
295
296         dp->dccps_role = DCCP_ROLE_LISTEN;
297         return inet_csk_listen_start(sk, backlog);
298 }
299
300 static inline int dccp_need_reset(int state)
301 {
302         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303                state != DCCP_REQUESTING;
304 }
305
306 int dccp_disconnect(struct sock *sk, int flags)
307 {
308         struct inet_connection_sock *icsk = inet_csk(sk);
309         struct inet_sock *inet = inet_sk(sk);
310         int err = 0;
311         const int old_state = sk->sk_state;
312
313         if (old_state != DCCP_CLOSED)
314                 dccp_set_state(sk, DCCP_CLOSED);
315
316         /*
317          * This corresponds to the ABORT function of RFC793, sec. 3.8
318          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
319          */
320         if (old_state == DCCP_LISTEN) {
321                 inet_csk_listen_stop(sk);
322         } else if (dccp_need_reset(old_state)) {
323                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324                 sk->sk_err = ECONNRESET;
325         } else if (old_state == DCCP_REQUESTING)
326                 sk->sk_err = ECONNRESET;
327
328         dccp_clear_xmit_timers(sk);
329         __skb_queue_purge(&sk->sk_receive_queue);
330         if (sk->sk_send_head != NULL) {
331                 __kfree_skb(sk->sk_send_head);
332                 sk->sk_send_head = NULL;
333         }
334
335         inet->dport = 0;
336
337         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
338                 inet_reset_saddr(sk);
339
340         sk->sk_shutdown = 0;
341         sock_reset_flag(sk, SOCK_DONE);
342
343         icsk->icsk_backoff = 0;
344         inet_csk_delack_init(sk);
345         __sk_dst_reset(sk);
346
347         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
348
349         sk->sk_error_report(sk);
350         return err;
351 }
352
353 EXPORT_SYMBOL_GPL(dccp_disconnect);
354
355 /*
356  *      Wait for a DCCP event.
357  *
358  *      Note that we don't need to lock the socket, as the upper poll layers
359  *      take care of normal races (between the test and the event) and we don't
360  *      go look at any of the socket buffers directly.
361  */
362 unsigned int dccp_poll(struct file *file, struct socket *sock,
363                        poll_table *wait)
364 {
365         unsigned int mask;
366         struct sock *sk = sock->sk;
367
368         poll_wait(file, sk->sk_sleep, wait);
369         if (sk->sk_state == DCCP_LISTEN)
370                 return inet_csk_listen_poll(sk);
371
372         /* Socket is not locked. We are protected from async events
373            by poll logic and correct handling of state changes
374            made by another threads is impossible in any case.
375          */
376
377         mask = 0;
378         if (sk->sk_err)
379                 mask = POLLERR;
380
381         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
382                 mask |= POLLHUP;
383         if (sk->sk_shutdown & RCV_SHUTDOWN)
384                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
385
386         /* Connected? */
387         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
388                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
389                         mask |= POLLIN | POLLRDNORM;
390
391                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
392                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
393                                 mask |= POLLOUT | POLLWRNORM;
394                         } else {  /* send SIGIO later */
395                                 set_bit(SOCK_ASYNC_NOSPACE,
396                                         &sk->sk_socket->flags);
397                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
398
399                                 /* Race breaker. If space is freed after
400                                  * wspace test but before the flags are set,
401                                  * IO signal will be lost.
402                                  */
403                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
404                                         mask |= POLLOUT | POLLWRNORM;
405                         }
406                 }
407         }
408         return mask;
409 }
410
411 EXPORT_SYMBOL_GPL(dccp_poll);
412
413 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
414 {
415         int rc = -ENOTCONN;
416
417         lock_sock(sk);
418
419         if (sk->sk_state == DCCP_LISTEN)
420                 goto out;
421
422         switch (cmd) {
423         case SIOCINQ: {
424                 struct sk_buff *skb;
425                 unsigned long amount = 0;
426
427                 skb = skb_peek(&sk->sk_receive_queue);
428                 if (skb != NULL) {
429                         /*
430                          * We will only return the amount of this packet since
431                          * that is all that will be read.
432                          */
433                         amount = skb->len;
434                 }
435                 rc = put_user(amount, (int __user *)arg);
436         }
437                 break;
438         default:
439                 rc = -ENOIOCTLCMD;
440                 break;
441         }
442 out:
443         release_sock(sk);
444         return rc;
445 }
446
447 EXPORT_SYMBOL_GPL(dccp_ioctl);
448
449 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
450                                    char __user *optval, int optlen)
451 {
452         struct dccp_sock *dp = dccp_sk(sk);
453         struct dccp_service_list *sl = NULL;
454
455         if (service == DCCP_SERVICE_INVALID_VALUE ||
456             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
457                 return -EINVAL;
458
459         if (optlen > sizeof(service)) {
460                 sl = kmalloc(optlen, GFP_KERNEL);
461                 if (sl == NULL)
462                         return -ENOMEM;
463
464                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
465                 if (copy_from_user(sl->dccpsl_list,
466                                    optval + sizeof(service),
467                                    optlen - sizeof(service)) ||
468                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
469                         kfree(sl);
470                         return -EFAULT;
471                 }
472         }
473
474         lock_sock(sk);
475         dp->dccps_service = service;
476
477         kfree(dp->dccps_service_list);
478
479         dp->dccps_service_list = sl;
480         release_sock(sk);
481         return 0;
482 }
483
484 /* byte 1 is feature.  the rest is the preference list */
485 static int dccp_setsockopt_change(struct sock *sk, int type,
486                                   struct dccp_so_feat __user *optval)
487 {
488         struct dccp_so_feat opt;
489         u8 *val;
490         int rc;
491
492         if (copy_from_user(&opt, optval, sizeof(opt)))
493                 return -EFAULT;
494
495         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
496         if (!val)
497                 return -ENOMEM;
498
499         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
500                 rc = -EFAULT;
501                 goto out_free_val;
502         }
503
504         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
505                               val, opt.dccpsf_len, GFP_KERNEL);
506         if (rc)
507                 goto out_free_val;
508
509 out:
510         return rc;
511
512 out_free_val:
513         kfree(val);
514         goto out;
515 }
516
517 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518                 char __user *optval, int optlen)
519 {
520         struct dccp_sock *dp = dccp_sk(sk);
521         int val, err = 0;
522
523         if (optlen < sizeof(int))
524                 return -EINVAL;
525
526         if (get_user(val, (int __user *)optval))
527                 return -EFAULT;
528
529         if (optname == DCCP_SOCKOPT_SERVICE)
530                 return dccp_setsockopt_service(sk, val, optval, optlen);
531
532         lock_sock(sk);
533         switch (optname) {
534         case DCCP_SOCKOPT_PACKET_SIZE:
535                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
536                 err = 0;
537                 break;
538         case DCCP_SOCKOPT_CHANGE_L:
539                 if (optlen != sizeof(struct dccp_so_feat))
540                         err = -EINVAL;
541                 else
542                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
543                                                      (struct dccp_so_feat __user *)
544                                                      optval);
545                 break;
546         case DCCP_SOCKOPT_CHANGE_R:
547                 if (optlen != sizeof(struct dccp_so_feat))
548                         err = -EINVAL;
549                 else
550                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
551                                                      (struct dccp_so_feat __user *)
552                                                      optval);
553                 break;
554         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
555                 if (dp->dccps_role != DCCP_ROLE_SERVER)
556                         err = -EOPNOTSUPP;
557                 else
558                         dp->dccps_server_timewait = (val != 0);
559                 break;
560         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
561                 if (val < 0 || val > 15)
562                         err = -EINVAL;
563                 else
564                         dp->dccps_pcslen = val;
565                 break;
566         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
567                 if (val < 0 || val > 15)
568                         err = -EINVAL;
569                 else {
570                         dp->dccps_pcrlen = val;
571                         /* FIXME: add feature negotiation,
572                          * ChangeL(MinimumChecksumCoverage, val) */
573                 }
574                 break;
575         default:
576                 err = -ENOPROTOOPT;
577                 break;
578         }
579
580         release_sock(sk);
581         return err;
582 }
583
584 int dccp_setsockopt(struct sock *sk, int level, int optname,
585                     char __user *optval, int optlen)
586 {
587         if (level != SOL_DCCP)
588                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
589                                                              optname, optval,
590                                                              optlen);
591         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592 }
593
594 EXPORT_SYMBOL_GPL(dccp_setsockopt);
595
596 #ifdef CONFIG_COMPAT
597 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
598                            char __user *optval, int optlen)
599 {
600         if (level != SOL_DCCP)
601                 return inet_csk_compat_setsockopt(sk, level, optname,
602                                                   optval, optlen);
603         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
604 }
605
606 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
607 #endif
608
609 static int dccp_getsockopt_service(struct sock *sk, int len,
610                                    __be32 __user *optval,
611                                    int __user *optlen)
612 {
613         const struct dccp_sock *dp = dccp_sk(sk);
614         const struct dccp_service_list *sl;
615         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
616
617         lock_sock(sk);
618         if ((sl = dp->dccps_service_list) != NULL) {
619                 slen = sl->dccpsl_nr * sizeof(u32);
620                 total_len += slen;
621         }
622
623         err = -EINVAL;
624         if (total_len > len)
625                 goto out;
626
627         err = 0;
628         if (put_user(total_len, optlen) ||
629             put_user(dp->dccps_service, optval) ||
630             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
631                 err = -EFAULT;
632 out:
633         release_sock(sk);
634         return err;
635 }
636
637 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
638                     char __user *optval, int __user *optlen)
639 {
640         struct dccp_sock *dp;
641         int val, len;
642
643         if (get_user(len, optlen))
644                 return -EFAULT;
645
646         if (len < (int)sizeof(int))
647                 return -EINVAL;
648
649         dp = dccp_sk(sk);
650
651         switch (optname) {
652         case DCCP_SOCKOPT_PACKET_SIZE:
653                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
654                 return 0;
655         case DCCP_SOCKOPT_SERVICE:
656                 return dccp_getsockopt_service(sk, len,
657                                                (__be32 __user *)optval, optlen);
658         case DCCP_SOCKOPT_GET_CUR_MPS:
659                 val = dp->dccps_mss_cache;
660                 len = sizeof(val);
661                 break;
662         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
663                 val = dp->dccps_server_timewait;
664                 len = sizeof(val);
665                 break;
666         case DCCP_SOCKOPT_SEND_CSCOV:
667                 val = dp->dccps_pcslen;
668                 len = sizeof(val);
669                 break;
670         case DCCP_SOCKOPT_RECV_CSCOV:
671                 val = dp->dccps_pcrlen;
672                 len = sizeof(val);
673                 break;
674         case 128 ... 191:
675                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
676                                              len, (u32 __user *)optval, optlen);
677         case 192 ... 255:
678                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
679                                              len, (u32 __user *)optval, optlen);
680         default:
681                 return -ENOPROTOOPT;
682         }
683
684         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
685                 return -EFAULT;
686
687         return 0;
688 }
689
690 int dccp_getsockopt(struct sock *sk, int level, int optname,
691                     char __user *optval, int __user *optlen)
692 {
693         if (level != SOL_DCCP)
694                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
695                                                              optname, optval,
696                                                              optlen);
697         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
698 }
699
700 EXPORT_SYMBOL_GPL(dccp_getsockopt);
701
702 #ifdef CONFIG_COMPAT
703 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
704                            char __user *optval, int __user *optlen)
705 {
706         if (level != SOL_DCCP)
707                 return inet_csk_compat_getsockopt(sk, level, optname,
708                                                   optval, optlen);
709         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
710 }
711
712 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
713 #endif
714
715 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
716                  size_t len)
717 {
718         const struct dccp_sock *dp = dccp_sk(sk);
719         const int flags = msg->msg_flags;
720         const int noblock = flags & MSG_DONTWAIT;
721         struct sk_buff *skb;
722         int rc, size;
723         long timeo;
724
725         if (len > dp->dccps_mss_cache)
726                 return -EMSGSIZE;
727
728         lock_sock(sk);
729
730         if (sysctl_dccp_tx_qlen &&
731             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
732                 rc = -EAGAIN;
733                 goto out_release;
734         }
735
736         timeo = sock_sndtimeo(sk, noblock);
737
738         /*
739          * We have to use sk_stream_wait_connect here to set sk_write_pending,
740          * so that the trick in dccp_rcv_request_sent_state_process.
741          */
742         /* Wait for a connection to finish. */
743         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
744                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
745                         goto out_release;
746
747         size = sk->sk_prot->max_header + len;
748         release_sock(sk);
749         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
750         lock_sock(sk);
751         if (skb == NULL)
752                 goto out_release;
753
754         skb_reserve(skb, sk->sk_prot->max_header);
755         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
756         if (rc != 0)
757                 goto out_discard;
758
759         skb_queue_tail(&sk->sk_write_queue, skb);
760         dccp_write_xmit(sk,0);
761 out_release:
762         release_sock(sk);
763         return rc ? : len;
764 out_discard:
765         kfree_skb(skb);
766         goto out_release;
767 }
768
769 EXPORT_SYMBOL_GPL(dccp_sendmsg);
770
771 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
772                  size_t len, int nonblock, int flags, int *addr_len)
773 {
774         const struct dccp_hdr *dh;
775         long timeo;
776
777         lock_sock(sk);
778
779         if (sk->sk_state == DCCP_LISTEN) {
780                 len = -ENOTCONN;
781                 goto out;
782         }
783
784         timeo = sock_rcvtimeo(sk, nonblock);
785
786         do {
787                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
788
789                 if (skb == NULL)
790                         goto verify_sock_status;
791
792                 dh = dccp_hdr(skb);
793
794                 switch (dh->dccph_type) {
795                 case DCCP_PKT_DATA:
796                 case DCCP_PKT_DATAACK:
797                         goto found_ok_skb;
798
799                 case DCCP_PKT_CLOSE:
800                 case DCCP_PKT_CLOSEREQ:
801                         if (!(flags & MSG_PEEK))
802                                 dccp_finish_passive_close(sk);
803                         /* fall through */
804                 case DCCP_PKT_RESET:
805                         dccp_pr_debug("found fin (%s) ok!\n",
806                                       dccp_packet_name(dh->dccph_type));
807                         len = 0;
808                         goto found_fin_ok;
809                 default:
810                         dccp_pr_debug("packet_type=%s\n",
811                                       dccp_packet_name(dh->dccph_type));
812                         sk_eat_skb(sk, skb, 0);
813                 }
814 verify_sock_status:
815                 if (sock_flag(sk, SOCK_DONE)) {
816                         len = 0;
817                         break;
818                 }
819
820                 if (sk->sk_err) {
821                         len = sock_error(sk);
822                         break;
823                 }
824
825                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
826                         len = 0;
827                         break;
828                 }
829
830                 if (sk->sk_state == DCCP_CLOSED) {
831                         if (!sock_flag(sk, SOCK_DONE)) {
832                                 /* This occurs when user tries to read
833                                  * from never connected socket.
834                                  */
835                                 len = -ENOTCONN;
836                                 break;
837                         }
838                         len = 0;
839                         break;
840                 }
841
842                 if (!timeo) {
843                         len = -EAGAIN;
844                         break;
845                 }
846
847                 if (signal_pending(current)) {
848                         len = sock_intr_errno(timeo);
849                         break;
850                 }
851
852                 sk_wait_data(sk, &timeo);
853                 continue;
854         found_ok_skb:
855                 if (len > skb->len)
856                         len = skb->len;
857                 else if (len < skb->len)
858                         msg->msg_flags |= MSG_TRUNC;
859
860                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
861                         /* Exception. Bailout! */
862                         len = -EFAULT;
863                         break;
864                 }
865         found_fin_ok:
866                 if (!(flags & MSG_PEEK))
867                         sk_eat_skb(sk, skb, 0);
868                 break;
869         } while (1);
870 out:
871         release_sock(sk);
872         return len;
873 }
874
875 EXPORT_SYMBOL_GPL(dccp_recvmsg);
876
877 int inet_dccp_listen(struct socket *sock, int backlog)
878 {
879         struct sock *sk = sock->sk;
880         unsigned char old_state;
881         int err;
882
883         lock_sock(sk);
884
885         err = -EINVAL;
886         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
887                 goto out;
888
889         old_state = sk->sk_state;
890         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
891                 goto out;
892
893         /* Really, if the socket is already in listen state
894          * we can only allow the backlog to be adjusted.
895          */
896         if (old_state != DCCP_LISTEN) {
897                 /*
898                  * FIXME: here it probably should be sk->sk_prot->listen_start
899                  * see tcp_listen_start
900                  */
901                 err = dccp_listen_start(sk, backlog);
902                 if (err)
903                         goto out;
904         }
905         sk->sk_max_ack_backlog = backlog;
906         err = 0;
907
908 out:
909         release_sock(sk);
910         return err;
911 }
912
913 EXPORT_SYMBOL_GPL(inet_dccp_listen);
914
915 static void dccp_terminate_connection(struct sock *sk)
916 {
917         u8 next_state = DCCP_CLOSED;
918
919         switch (sk->sk_state) {
920         case DCCP_PASSIVE_CLOSE:
921         case DCCP_PASSIVE_CLOSEREQ:
922                 dccp_finish_passive_close(sk);
923                 break;
924         case DCCP_PARTOPEN:
925                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
926                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
927                 /* fall through */
928         case DCCP_OPEN:
929                 dccp_send_close(sk, 1);
930
931                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
932                     !dccp_sk(sk)->dccps_server_timewait)
933                         next_state = DCCP_ACTIVE_CLOSEREQ;
934                 else
935                         next_state = DCCP_CLOSING;
936                 /* fall through */
937         default:
938                 dccp_set_state(sk, next_state);
939         }
940 }
941
942 void dccp_close(struct sock *sk, long timeout)
943 {
944         struct dccp_sock *dp = dccp_sk(sk);
945         struct sk_buff *skb;
946         u32 data_was_unread = 0;
947         int state;
948
949         lock_sock(sk);
950
951         sk->sk_shutdown = SHUTDOWN_MASK;
952
953         if (sk->sk_state == DCCP_LISTEN) {
954                 dccp_set_state(sk, DCCP_CLOSED);
955
956                 /* Special case. */
957                 inet_csk_listen_stop(sk);
958
959                 goto adjudge_to_death;
960         }
961
962         sk_stop_timer(sk, &dp->dccps_xmit_timer);
963
964         /*
965          * We need to flush the recv. buffs.  We do this only on the
966          * descriptor close, not protocol-sourced closes, because the
967           *reader process may not have drained the data yet!
968          */
969         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
970                 data_was_unread += skb->len;
971                 __kfree_skb(skb);
972         }
973
974         if (data_was_unread) {
975                 /* Unread data was tossed, send an appropriate Reset Code */
976                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
977                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
978                 dccp_set_state(sk, DCCP_CLOSED);
979         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
980                 /* Check zero linger _after_ checking for unread data. */
981                 sk->sk_prot->disconnect(sk, 0);
982         } else if (sk->sk_state != DCCP_CLOSED) {
983                 dccp_terminate_connection(sk);
984         }
985
986         sk_stream_wait_close(sk, timeout);
987
988 adjudge_to_death:
989         state = sk->sk_state;
990         sock_hold(sk);
991         sock_orphan(sk);
992         atomic_inc(sk->sk_prot->orphan_count);
993
994         /*
995          * It is the last release_sock in its life. It will remove backlog.
996          */
997         release_sock(sk);
998         /*
999          * Now socket is owned by kernel and we acquire BH lock
1000          * to finish close. No need to check for user refs.
1001          */
1002         local_bh_disable();
1003         bh_lock_sock(sk);
1004         BUG_TRAP(!sock_owned_by_user(sk));
1005
1006         /* Have we already been destroyed by a softirq or backlog? */
1007         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1008                 goto out;
1009
1010         if (sk->sk_state == DCCP_CLOSED)
1011                 inet_csk_destroy_sock(sk);
1012
1013         /* Otherwise, socket is reprieved until protocol close. */
1014
1015 out:
1016         bh_unlock_sock(sk);
1017         local_bh_enable();
1018         sock_put(sk);
1019 }
1020
1021 EXPORT_SYMBOL_GPL(dccp_close);
1022
1023 void dccp_shutdown(struct sock *sk, int how)
1024 {
1025         dccp_pr_debug("called shutdown(%x)\n", how);
1026 }
1027
1028 EXPORT_SYMBOL_GPL(dccp_shutdown);
1029
1030 static int __init dccp_mib_init(void)
1031 {
1032         int rc = -ENOMEM;
1033
1034         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1035         if (dccp_statistics[0] == NULL)
1036                 goto out;
1037
1038         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1039         if (dccp_statistics[1] == NULL)
1040                 goto out_free_one;
1041
1042         rc = 0;
1043 out:
1044         return rc;
1045 out_free_one:
1046         free_percpu(dccp_statistics[0]);
1047         dccp_statistics[0] = NULL;
1048         goto out;
1049
1050 }
1051
1052 static void dccp_mib_exit(void)
1053 {
1054         free_percpu(dccp_statistics[0]);
1055         free_percpu(dccp_statistics[1]);
1056         dccp_statistics[0] = dccp_statistics[1] = NULL;
1057 }
1058
1059 static int thash_entries;
1060 module_param(thash_entries, int, 0444);
1061 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1062
1063 #ifdef CONFIG_IP_DCCP_DEBUG
1064 int dccp_debug;
1065 module_param(dccp_debug, bool, 0444);
1066 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1067
1068 EXPORT_SYMBOL_GPL(dccp_debug);
1069 #endif
1070
1071 static int __init dccp_init(void)
1072 {
1073         unsigned long goal;
1074         int ehash_order, bhash_order, i;
1075         int rc = -ENOBUFS;
1076
1077         dccp_hashinfo.bind_bucket_cachep =
1078                 kmem_cache_create("dccp_bind_bucket",
1079                                   sizeof(struct inet_bind_bucket), 0,
1080                                   SLAB_HWCACHE_ALIGN, NULL);
1081         if (!dccp_hashinfo.bind_bucket_cachep)
1082                 goto out;
1083
1084         /*
1085          * Size and allocate the main established and bind bucket
1086          * hash tables.
1087          *
1088          * The methodology is similar to that of the buffer cache.
1089          */
1090         if (num_physpages >= (128 * 1024))
1091                 goal = num_physpages >> (21 - PAGE_SHIFT);
1092         else
1093                 goal = num_physpages >> (23 - PAGE_SHIFT);
1094
1095         if (thash_entries)
1096                 goal = (thash_entries *
1097                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1098         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1099                 ;
1100         do {
1101                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1102                                         sizeof(struct inet_ehash_bucket);
1103                 while (dccp_hashinfo.ehash_size &
1104                        (dccp_hashinfo.ehash_size - 1))
1105                         dccp_hashinfo.ehash_size--;
1106                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1107                         __get_free_pages(GFP_ATOMIC, ehash_order);
1108         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1109
1110         if (!dccp_hashinfo.ehash) {
1111                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1112                 goto out_free_bind_bucket_cachep;
1113         }
1114
1115         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1116                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1117                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1118         }
1119
1120         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1121                         goto out_free_dccp_ehash;
1122
1123         bhash_order = ehash_order;
1124
1125         do {
1126                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1127                                         sizeof(struct inet_bind_hashbucket);
1128                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1129                     bhash_order > 0)
1130                         continue;
1131                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1132                         __get_free_pages(GFP_ATOMIC, bhash_order);
1133         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1134
1135         if (!dccp_hashinfo.bhash) {
1136                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1137                 goto out_free_dccp_locks;
1138         }
1139
1140         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1141                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1142                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1143         }
1144
1145         rc = dccp_mib_init();
1146         if (rc)
1147                 goto out_free_dccp_bhash;
1148
1149         rc = dccp_ackvec_init();
1150         if (rc)
1151                 goto out_free_dccp_mib;
1152
1153         rc = dccp_sysctl_init();
1154         if (rc)
1155                 goto out_ackvec_exit;
1156
1157         dccp_timestamping_init();
1158 out:
1159         return rc;
1160 out_ackvec_exit:
1161         dccp_ackvec_exit();
1162 out_free_dccp_mib:
1163         dccp_mib_exit();
1164 out_free_dccp_bhash:
1165         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1166         dccp_hashinfo.bhash = NULL;
1167 out_free_dccp_locks:
1168         inet_ehash_locks_free(&dccp_hashinfo);
1169 out_free_dccp_ehash:
1170         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1171         dccp_hashinfo.ehash = NULL;
1172 out_free_bind_bucket_cachep:
1173         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1174         dccp_hashinfo.bind_bucket_cachep = NULL;
1175         goto out;
1176 }
1177
1178 static void __exit dccp_fini(void)
1179 {
1180         dccp_mib_exit();
1181         free_pages((unsigned long)dccp_hashinfo.bhash,
1182                    get_order(dccp_hashinfo.bhash_size *
1183                              sizeof(struct inet_bind_hashbucket)));
1184         free_pages((unsigned long)dccp_hashinfo.ehash,
1185                    get_order(dccp_hashinfo.ehash_size *
1186                              sizeof(struct inet_ehash_bucket)));
1187         inet_ehash_locks_free(&dccp_hashinfo);
1188         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1189         dccp_ackvec_exit();
1190         dccp_sysctl_exit();
1191 }
1192
1193 module_init(dccp_init);
1194 module_exit(dccp_fini);
1195
1196 MODULE_LICENSE("GPL");
1197 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1198 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");