[DCCP]: Shift the retransmit timer for active-close into output.c
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49         .lhash_lock     = RW_LOCK_UNLOCKED,
50         .lhash_users    = ATOMIC_INIT(0),
51         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
58
59 void dccp_set_state(struct sock *sk, const int state)
60 {
61         const int oldstate = sk->sk_state;
62
63         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
64                       dccp_state_name(oldstate), dccp_state_name(state));
65         WARN_ON(state == oldstate);
66
67         switch (state) {
68         case DCCP_OPEN:
69                 if (oldstate != DCCP_OPEN)
70                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71                 break;
72
73         case DCCP_CLOSED:
74                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75                     oldstate == DCCP_CLOSING)
76                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78                 sk->sk_prot->unhash(sk);
79                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81                         inet_put_port(&dccp_hashinfo, sk);
82                 /* fall through */
83         default:
84                 if (oldstate == DCCP_OPEN)
85                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86         }
87
88         /* Change state AFTER socket is unhashed to avoid closed
89          * socket sitting in hash tables.
90          */
91         sk->sk_state = state;
92 }
93
94 EXPORT_SYMBOL_GPL(dccp_set_state);
95
96 static void dccp_finish_passive_close(struct sock *sk)
97 {
98         switch (sk->sk_state) {
99         case DCCP_PASSIVE_CLOSE:
100                 /* Node (client or server) has received Close packet. */
101                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102                 dccp_set_state(sk, DCCP_CLOSED);
103                 break;
104         case DCCP_PASSIVE_CLOSEREQ:
105                 /*
106                  * Client received CloseReq. We set the `active' flag so that
107                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108                  */
109                 dccp_send_close(sk, 1);
110                 dccp_set_state(sk, DCCP_CLOSING);
111         }
112 }
113
114 void dccp_done(struct sock *sk)
115 {
116         dccp_set_state(sk, DCCP_CLOSED);
117         dccp_clear_xmit_timers(sk);
118
119         sk->sk_shutdown = SHUTDOWN_MASK;
120
121         if (!sock_flag(sk, SOCK_DEAD))
122                 sk->sk_state_change(sk);
123         else
124                 inet_csk_destroy_sock(sk);
125 }
126
127 EXPORT_SYMBOL_GPL(dccp_done);
128
129 const char *dccp_packet_name(const int type)
130 {
131         static const char *dccp_packet_names[] = {
132                 [DCCP_PKT_REQUEST]  = "REQUEST",
133                 [DCCP_PKT_RESPONSE] = "RESPONSE",
134                 [DCCP_PKT_DATA]     = "DATA",
135                 [DCCP_PKT_ACK]      = "ACK",
136                 [DCCP_PKT_DATAACK]  = "DATAACK",
137                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138                 [DCCP_PKT_CLOSE]    = "CLOSE",
139                 [DCCP_PKT_RESET]    = "RESET",
140                 [DCCP_PKT_SYNC]     = "SYNC",
141                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
142         };
143
144         if (type >= DCCP_NR_PKT_TYPES)
145                 return "INVALID";
146         else
147                 return dccp_packet_names[type];
148 }
149
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
151
152 const char *dccp_state_name(const int state)
153 {
154         static char *dccp_state_names[] = {
155         [DCCP_OPEN]             = "OPEN",
156         [DCCP_REQUESTING]       = "REQUESTING",
157         [DCCP_PARTOPEN]         = "PARTOPEN",
158         [DCCP_LISTEN]           = "LISTEN",
159         [DCCP_RESPOND]          = "RESPOND",
160         [DCCP_CLOSING]          = "CLOSING",
161         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
162         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
163         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164         [DCCP_TIME_WAIT]        = "TIME_WAIT",
165         [DCCP_CLOSED]           = "CLOSED",
166         };
167
168         if (state >= DCCP_MAX_STATES)
169                 return "INVALID STATE!";
170         else
171                 return dccp_state_names[state];
172 }
173
174 EXPORT_SYMBOL_GPL(dccp_state_name);
175
176 void dccp_hash(struct sock *sk)
177 {
178         inet_hash(&dccp_hashinfo, sk);
179 }
180
181 EXPORT_SYMBOL_GPL(dccp_hash);
182
183 void dccp_unhash(struct sock *sk)
184 {
185         inet_unhash(&dccp_hashinfo, sk);
186 }
187
188 EXPORT_SYMBOL_GPL(dccp_unhash);
189
190 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
191 {
192         struct dccp_sock *dp = dccp_sk(sk);
193         struct dccp_minisock *dmsk = dccp_msk(sk);
194         struct inet_connection_sock *icsk = inet_csk(sk);
195
196         dccp_minisock_init(&dp->dccps_minisock);
197
198         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
199         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
200         sk->sk_state            = DCCP_CLOSED;
201         sk->sk_write_space      = dccp_write_space;
202         icsk->icsk_sync_mss     = dccp_sync_mss;
203         dp->dccps_mss_cache     = 536;
204         dp->dccps_rate_last     = jiffies;
205         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
206         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
207         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
208
209         dccp_init_xmit_timers(sk);
210
211         /*
212          * FIXME: We're hardcoding the CCID, and doing this at this point makes
213          * the listening (master) sock get CCID control blocks, which is not
214          * necessary, but for now, to not mess with the test userspace apps,
215          * lets leave it here, later the real solution is to do this in a
216          * setsockopt(CCIDs-I-want/accept). -acme
217          */
218         if (likely(ctl_sock_initialized)) {
219                 int rc = dccp_feat_init(dmsk);
220
221                 if (rc)
222                         return rc;
223
224                 if (dmsk->dccpms_send_ack_vector) {
225                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
226                         if (dp->dccps_hc_rx_ackvec == NULL)
227                                 return -ENOMEM;
228                 }
229                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
230                                                       sk, GFP_KERNEL);
231                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
232                                                       sk, GFP_KERNEL);
233                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
234                              dp->dccps_hc_tx_ccid == NULL)) {
235                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
237                         if (dmsk->dccpms_send_ack_vector) {
238                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239                                 dp->dccps_hc_rx_ackvec = NULL;
240                         }
241                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
242                         return -ENOMEM;
243                 }
244         } else {
245                 /* control socket doesn't need feat nego */
246                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
247                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
248         }
249
250         return 0;
251 }
252
253 EXPORT_SYMBOL_GPL(dccp_init_sock);
254
255 int dccp_destroy_sock(struct sock *sk)
256 {
257         struct dccp_sock *dp = dccp_sk(sk);
258         struct dccp_minisock *dmsk = dccp_msk(sk);
259
260         /*
261          * DCCP doesn't use sk_write_queue, just sk_send_head
262          * for retransmissions
263          */
264         if (sk->sk_send_head != NULL) {
265                 kfree_skb(sk->sk_send_head);
266                 sk->sk_send_head = NULL;
267         }
268
269         /* Clean up a referenced DCCP bind bucket. */
270         if (inet_csk(sk)->icsk_bind_hash != NULL)
271                 inet_put_port(&dccp_hashinfo, sk);
272
273         kfree(dp->dccps_service_list);
274         dp->dccps_service_list = NULL;
275
276         if (dmsk->dccpms_send_ack_vector) {
277                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
278                 dp->dccps_hc_rx_ackvec = NULL;
279         }
280         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
281         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
282         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
283
284         /* clean up feature negotiation state */
285         dccp_feat_clean(dmsk);
286
287         return 0;
288 }
289
290 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
291
292 static inline int dccp_listen_start(struct sock *sk, int backlog)
293 {
294         struct dccp_sock *dp = dccp_sk(sk);
295
296         dp->dccps_role = DCCP_ROLE_LISTEN;
297         return inet_csk_listen_start(sk, backlog);
298 }
299
300 static inline int dccp_need_reset(int state)
301 {
302         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303                state != DCCP_REQUESTING;
304 }
305
306 int dccp_disconnect(struct sock *sk, int flags)
307 {
308         struct inet_connection_sock *icsk = inet_csk(sk);
309         struct inet_sock *inet = inet_sk(sk);
310         int err = 0;
311         const int old_state = sk->sk_state;
312
313         if (old_state != DCCP_CLOSED)
314                 dccp_set_state(sk, DCCP_CLOSED);
315
316         /*
317          * This corresponds to the ABORT function of RFC793, sec. 3.8
318          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
319          */
320         if (old_state == DCCP_LISTEN) {
321                 inet_csk_listen_stop(sk);
322         } else if (dccp_need_reset(old_state)) {
323                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324                 sk->sk_err = ECONNRESET;
325         } else if (old_state == DCCP_REQUESTING)
326                 sk->sk_err = ECONNRESET;
327
328         dccp_clear_xmit_timers(sk);
329         __skb_queue_purge(&sk->sk_receive_queue);
330         if (sk->sk_send_head != NULL) {
331                 __kfree_skb(sk->sk_send_head);
332                 sk->sk_send_head = NULL;
333         }
334
335         inet->dport = 0;
336
337         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
338                 inet_reset_saddr(sk);
339
340         sk->sk_shutdown = 0;
341         sock_reset_flag(sk, SOCK_DONE);
342
343         icsk->icsk_backoff = 0;
344         inet_csk_delack_init(sk);
345         __sk_dst_reset(sk);
346
347         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
348
349         sk->sk_error_report(sk);
350         return err;
351 }
352
353 EXPORT_SYMBOL_GPL(dccp_disconnect);
354
355 /*
356  *      Wait for a DCCP event.
357  *
358  *      Note that we don't need to lock the socket, as the upper poll layers
359  *      take care of normal races (between the test and the event) and we don't
360  *      go look at any of the socket buffers directly.
361  */
362 unsigned int dccp_poll(struct file *file, struct socket *sock,
363                        poll_table *wait)
364 {
365         unsigned int mask;
366         struct sock *sk = sock->sk;
367
368         poll_wait(file, sk->sk_sleep, wait);
369         if (sk->sk_state == DCCP_LISTEN)
370                 return inet_csk_listen_poll(sk);
371
372         /* Socket is not locked. We are protected from async events
373            by poll logic and correct handling of state changes
374            made by another threads is impossible in any case.
375          */
376
377         mask = 0;
378         if (sk->sk_err)
379                 mask = POLLERR;
380
381         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
382                 mask |= POLLHUP;
383         if (sk->sk_shutdown & RCV_SHUTDOWN)
384                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
385
386         /* Connected? */
387         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
388                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
389                         mask |= POLLIN | POLLRDNORM;
390
391                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
392                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
393                                 mask |= POLLOUT | POLLWRNORM;
394                         } else {  /* send SIGIO later */
395                                 set_bit(SOCK_ASYNC_NOSPACE,
396                                         &sk->sk_socket->flags);
397                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
398
399                                 /* Race breaker. If space is freed after
400                                  * wspace test but before the flags are set,
401                                  * IO signal will be lost.
402                                  */
403                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
404                                         mask |= POLLOUT | POLLWRNORM;
405                         }
406                 }
407         }
408         return mask;
409 }
410
411 EXPORT_SYMBOL_GPL(dccp_poll);
412
413 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
414 {
415         int rc = -ENOTCONN;
416
417         lock_sock(sk);
418
419         if (sk->sk_state == DCCP_LISTEN)
420                 goto out;
421
422         switch (cmd) {
423         case SIOCINQ: {
424                 struct sk_buff *skb;
425                 unsigned long amount = 0;
426
427                 skb = skb_peek(&sk->sk_receive_queue);
428                 if (skb != NULL) {
429                         /*
430                          * We will only return the amount of this packet since
431                          * that is all that will be read.
432                          */
433                         amount = skb->len;
434                 }
435                 rc = put_user(amount, (int __user *)arg);
436         }
437                 break;
438         default:
439                 rc = -ENOIOCTLCMD;
440                 break;
441         }
442 out:
443         release_sock(sk);
444         return rc;
445 }
446
447 EXPORT_SYMBOL_GPL(dccp_ioctl);
448
449 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
450                                    char __user *optval, int optlen)
451 {
452         struct dccp_sock *dp = dccp_sk(sk);
453         struct dccp_service_list *sl = NULL;
454
455         if (service == DCCP_SERVICE_INVALID_VALUE ||
456             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
457                 return -EINVAL;
458
459         if (optlen > sizeof(service)) {
460                 sl = kmalloc(optlen, GFP_KERNEL);
461                 if (sl == NULL)
462                         return -ENOMEM;
463
464                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
465                 if (copy_from_user(sl->dccpsl_list,
466                                    optval + sizeof(service),
467                                    optlen - sizeof(service)) ||
468                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
469                         kfree(sl);
470                         return -EFAULT;
471                 }
472         }
473
474         lock_sock(sk);
475         dp->dccps_service = service;
476
477         kfree(dp->dccps_service_list);
478
479         dp->dccps_service_list = sl;
480         release_sock(sk);
481         return 0;
482 }
483
484 /* byte 1 is feature.  the rest is the preference list */
485 static int dccp_setsockopt_change(struct sock *sk, int type,
486                                   struct dccp_so_feat __user *optval)
487 {
488         struct dccp_so_feat opt;
489         u8 *val;
490         int rc;
491
492         if (copy_from_user(&opt, optval, sizeof(opt)))
493                 return -EFAULT;
494
495         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
496         if (!val)
497                 return -ENOMEM;
498
499         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
500                 rc = -EFAULT;
501                 goto out_free_val;
502         }
503
504         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
505                               val, opt.dccpsf_len, GFP_KERNEL);
506         if (rc)
507                 goto out_free_val;
508
509 out:
510         return rc;
511
512 out_free_val:
513         kfree(val);
514         goto out;
515 }
516
517 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518                 char __user *optval, int optlen)
519 {
520         struct dccp_sock *dp = dccp_sk(sk);
521         int val, err = 0;
522
523         if (optlen < sizeof(int))
524                 return -EINVAL;
525
526         if (get_user(val, (int __user *)optval))
527                 return -EFAULT;
528
529         if (optname == DCCP_SOCKOPT_SERVICE)
530                 return dccp_setsockopt_service(sk, val, optval, optlen);
531
532         lock_sock(sk);
533         switch (optname) {
534         case DCCP_SOCKOPT_PACKET_SIZE:
535                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
536                 err = 0;
537                 break;
538         case DCCP_SOCKOPT_CHANGE_L:
539                 if (optlen != sizeof(struct dccp_so_feat))
540                         err = -EINVAL;
541                 else
542                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
543                                                      (struct dccp_so_feat __user *)
544                                                      optval);
545                 break;
546         case DCCP_SOCKOPT_CHANGE_R:
547                 if (optlen != sizeof(struct dccp_so_feat))
548                         err = -EINVAL;
549                 else
550                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
551                                                      (struct dccp_so_feat __user *)
552                                                      optval);
553                 break;
554         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
555                 if (val < 0 || val > 15)
556                         err = -EINVAL;
557                 else
558                         dp->dccps_pcslen = val;
559                 break;
560         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
561                 if (val < 0 || val > 15)
562                         err = -EINVAL;
563                 else {
564                         dp->dccps_pcrlen = val;
565                         /* FIXME: add feature negotiation,
566                          * ChangeL(MinimumChecksumCoverage, val) */
567                 }
568                 break;
569         default:
570                 err = -ENOPROTOOPT;
571                 break;
572         }
573
574         release_sock(sk);
575         return err;
576 }
577
578 int dccp_setsockopt(struct sock *sk, int level, int optname,
579                     char __user *optval, int optlen)
580 {
581         if (level != SOL_DCCP)
582                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
583                                                              optname, optval,
584                                                              optlen);
585         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
586 }
587
588 EXPORT_SYMBOL_GPL(dccp_setsockopt);
589
590 #ifdef CONFIG_COMPAT
591 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
592                            char __user *optval, int optlen)
593 {
594         if (level != SOL_DCCP)
595                 return inet_csk_compat_setsockopt(sk, level, optname,
596                                                   optval, optlen);
597         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
598 }
599
600 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
601 #endif
602
603 static int dccp_getsockopt_service(struct sock *sk, int len,
604                                    __be32 __user *optval,
605                                    int __user *optlen)
606 {
607         const struct dccp_sock *dp = dccp_sk(sk);
608         const struct dccp_service_list *sl;
609         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
610
611         lock_sock(sk);
612         if ((sl = dp->dccps_service_list) != NULL) {
613                 slen = sl->dccpsl_nr * sizeof(u32);
614                 total_len += slen;
615         }
616
617         err = -EINVAL;
618         if (total_len > len)
619                 goto out;
620
621         err = 0;
622         if (put_user(total_len, optlen) ||
623             put_user(dp->dccps_service, optval) ||
624             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
625                 err = -EFAULT;
626 out:
627         release_sock(sk);
628         return err;
629 }
630
631 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
632                     char __user *optval, int __user *optlen)
633 {
634         struct dccp_sock *dp;
635         int val, len;
636
637         if (get_user(len, optlen))
638                 return -EFAULT;
639
640         if (len < (int)sizeof(int))
641                 return -EINVAL;
642
643         dp = dccp_sk(sk);
644
645         switch (optname) {
646         case DCCP_SOCKOPT_PACKET_SIZE:
647                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
648                 return 0;
649         case DCCP_SOCKOPT_SERVICE:
650                 return dccp_getsockopt_service(sk, len,
651                                                (__be32 __user *)optval, optlen);
652         case DCCP_SOCKOPT_GET_CUR_MPS:
653                 val = dp->dccps_mss_cache;
654                 len = sizeof(val);
655                 break;
656         case DCCP_SOCKOPT_SEND_CSCOV:
657                 val = dp->dccps_pcslen;
658                 len = sizeof(val);
659                 break;
660         case DCCP_SOCKOPT_RECV_CSCOV:
661                 val = dp->dccps_pcrlen;
662                 len = sizeof(val);
663                 break;
664         case 128 ... 191:
665                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
666                                              len, (u32 __user *)optval, optlen);
667         case 192 ... 255:
668                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
669                                              len, (u32 __user *)optval, optlen);
670         default:
671                 return -ENOPROTOOPT;
672         }
673
674         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
675                 return -EFAULT;
676
677         return 0;
678 }
679
680 int dccp_getsockopt(struct sock *sk, int level, int optname,
681                     char __user *optval, int __user *optlen)
682 {
683         if (level != SOL_DCCP)
684                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
685                                                              optname, optval,
686                                                              optlen);
687         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
688 }
689
690 EXPORT_SYMBOL_GPL(dccp_getsockopt);
691
692 #ifdef CONFIG_COMPAT
693 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
694                            char __user *optval, int __user *optlen)
695 {
696         if (level != SOL_DCCP)
697                 return inet_csk_compat_getsockopt(sk, level, optname,
698                                                   optval, optlen);
699         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
700 }
701
702 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
703 #endif
704
705 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
706                  size_t len)
707 {
708         const struct dccp_sock *dp = dccp_sk(sk);
709         const int flags = msg->msg_flags;
710         const int noblock = flags & MSG_DONTWAIT;
711         struct sk_buff *skb;
712         int rc, size;
713         long timeo;
714
715         if (len > dp->dccps_mss_cache)
716                 return -EMSGSIZE;
717
718         lock_sock(sk);
719
720         if (sysctl_dccp_tx_qlen &&
721             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
722                 rc = -EAGAIN;
723                 goto out_release;
724         }
725
726         timeo = sock_sndtimeo(sk, noblock);
727
728         /*
729          * We have to use sk_stream_wait_connect here to set sk_write_pending,
730          * so that the trick in dccp_rcv_request_sent_state_process.
731          */
732         /* Wait for a connection to finish. */
733         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
734                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
735                         goto out_release;
736
737         size = sk->sk_prot->max_header + len;
738         release_sock(sk);
739         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
740         lock_sock(sk);
741         if (skb == NULL)
742                 goto out_release;
743
744         skb_reserve(skb, sk->sk_prot->max_header);
745         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
746         if (rc != 0)
747                 goto out_discard;
748
749         skb_queue_tail(&sk->sk_write_queue, skb);
750         dccp_write_xmit(sk,0);
751 out_release:
752         release_sock(sk);
753         return rc ? : len;
754 out_discard:
755         kfree_skb(skb);
756         goto out_release;
757 }
758
759 EXPORT_SYMBOL_GPL(dccp_sendmsg);
760
761 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
762                  size_t len, int nonblock, int flags, int *addr_len)
763 {
764         const struct dccp_hdr *dh;
765         long timeo;
766
767         lock_sock(sk);
768
769         if (sk->sk_state == DCCP_LISTEN) {
770                 len = -ENOTCONN;
771                 goto out;
772         }
773
774         timeo = sock_rcvtimeo(sk, nonblock);
775
776         do {
777                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
778
779                 if (skb == NULL)
780                         goto verify_sock_status;
781
782                 dh = dccp_hdr(skb);
783
784                 switch (dh->dccph_type) {
785                 case DCCP_PKT_DATA:
786                 case DCCP_PKT_DATAACK:
787                         goto found_ok_skb;
788
789                 case DCCP_PKT_CLOSE:
790                 case DCCP_PKT_CLOSEREQ:
791                         if (!(flags & MSG_PEEK))
792                                 dccp_finish_passive_close(sk);
793                         /* fall through */
794                 case DCCP_PKT_RESET:
795                         dccp_pr_debug("found fin (%s) ok!\n",
796                                       dccp_packet_name(dh->dccph_type));
797                         len = 0;
798                         goto found_fin_ok;
799                 default:
800                         dccp_pr_debug("packet_type=%s\n",
801                                       dccp_packet_name(dh->dccph_type));
802                         sk_eat_skb(sk, skb, 0);
803                 }
804 verify_sock_status:
805                 if (sock_flag(sk, SOCK_DONE)) {
806                         len = 0;
807                         break;
808                 }
809
810                 if (sk->sk_err) {
811                         len = sock_error(sk);
812                         break;
813                 }
814
815                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
816                         len = 0;
817                         break;
818                 }
819
820                 if (sk->sk_state == DCCP_CLOSED) {
821                         if (!sock_flag(sk, SOCK_DONE)) {
822                                 /* This occurs when user tries to read
823                                  * from never connected socket.
824                                  */
825                                 len = -ENOTCONN;
826                                 break;
827                         }
828                         len = 0;
829                         break;
830                 }
831
832                 if (!timeo) {
833                         len = -EAGAIN;
834                         break;
835                 }
836
837                 if (signal_pending(current)) {
838                         len = sock_intr_errno(timeo);
839                         break;
840                 }
841
842                 sk_wait_data(sk, &timeo);
843                 continue;
844         found_ok_skb:
845                 if (len > skb->len)
846                         len = skb->len;
847                 else if (len < skb->len)
848                         msg->msg_flags |= MSG_TRUNC;
849
850                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
851                         /* Exception. Bailout! */
852                         len = -EFAULT;
853                         break;
854                 }
855         found_fin_ok:
856                 if (!(flags & MSG_PEEK))
857                         sk_eat_skb(sk, skb, 0);
858                 break;
859         } while (1);
860 out:
861         release_sock(sk);
862         return len;
863 }
864
865 EXPORT_SYMBOL_GPL(dccp_recvmsg);
866
867 int inet_dccp_listen(struct socket *sock, int backlog)
868 {
869         struct sock *sk = sock->sk;
870         unsigned char old_state;
871         int err;
872
873         lock_sock(sk);
874
875         err = -EINVAL;
876         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
877                 goto out;
878
879         old_state = sk->sk_state;
880         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
881                 goto out;
882
883         /* Really, if the socket is already in listen state
884          * we can only allow the backlog to be adjusted.
885          */
886         if (old_state != DCCP_LISTEN) {
887                 /*
888                  * FIXME: here it probably should be sk->sk_prot->listen_start
889                  * see tcp_listen_start
890                  */
891                 err = dccp_listen_start(sk, backlog);
892                 if (err)
893                         goto out;
894         }
895         sk->sk_max_ack_backlog = backlog;
896         err = 0;
897
898 out:
899         release_sock(sk);
900         return err;
901 }
902
903 EXPORT_SYMBOL_GPL(inet_dccp_listen);
904
905 static void dccp_terminate_connection(struct sock *sk)
906 {
907         u8 next_state = DCCP_CLOSED;
908
909         switch (sk->sk_state) {
910         case DCCP_PASSIVE_CLOSE:
911         case DCCP_PASSIVE_CLOSEREQ:
912                 dccp_finish_passive_close(sk);
913                 break;
914         case DCCP_PARTOPEN:
915                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
916                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
917                 /* fall through */
918         case DCCP_OPEN:
919                 dccp_send_close(sk, 1);
920
921                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER)
922                         next_state = DCCP_ACTIVE_CLOSEREQ;
923                 else
924                         next_state = DCCP_CLOSING;
925                 /* fall through */
926         default:
927                 dccp_set_state(sk, next_state);
928         }
929 }
930
931 void dccp_close(struct sock *sk, long timeout)
932 {
933         struct dccp_sock *dp = dccp_sk(sk);
934         struct sk_buff *skb;
935         u32 data_was_unread = 0;
936         int state;
937
938         lock_sock(sk);
939
940         sk->sk_shutdown = SHUTDOWN_MASK;
941
942         if (sk->sk_state == DCCP_LISTEN) {
943                 dccp_set_state(sk, DCCP_CLOSED);
944
945                 /* Special case. */
946                 inet_csk_listen_stop(sk);
947
948                 goto adjudge_to_death;
949         }
950
951         sk_stop_timer(sk, &dp->dccps_xmit_timer);
952
953         /*
954          * We need to flush the recv. buffs.  We do this only on the
955          * descriptor close, not protocol-sourced closes, because the
956           *reader process may not have drained the data yet!
957          */
958         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
959                 data_was_unread += skb->len;
960                 __kfree_skb(skb);
961         }
962
963         if (data_was_unread) {
964                 /* Unread data was tossed, send an appropriate Reset Code */
965                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
966                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
967                 dccp_set_state(sk, DCCP_CLOSED);
968         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
969                 /* Check zero linger _after_ checking for unread data. */
970                 sk->sk_prot->disconnect(sk, 0);
971         } else if (sk->sk_state != DCCP_CLOSED) {
972                 dccp_terminate_connection(sk);
973         }
974
975         sk_stream_wait_close(sk, timeout);
976
977 adjudge_to_death:
978         state = sk->sk_state;
979         sock_hold(sk);
980         sock_orphan(sk);
981         atomic_inc(sk->sk_prot->orphan_count);
982
983         /*
984          * It is the last release_sock in its life. It will remove backlog.
985          */
986         release_sock(sk);
987         /*
988          * Now socket is owned by kernel and we acquire BH lock
989          * to finish close. No need to check for user refs.
990          */
991         local_bh_disable();
992         bh_lock_sock(sk);
993         BUG_TRAP(!sock_owned_by_user(sk));
994
995         /* Have we already been destroyed by a softirq or backlog? */
996         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
997                 goto out;
998
999         if (sk->sk_state == DCCP_CLOSED)
1000                 inet_csk_destroy_sock(sk);
1001
1002         /* Otherwise, socket is reprieved until protocol close. */
1003
1004 out:
1005         bh_unlock_sock(sk);
1006         local_bh_enable();
1007         sock_put(sk);
1008 }
1009
1010 EXPORT_SYMBOL_GPL(dccp_close);
1011
1012 void dccp_shutdown(struct sock *sk, int how)
1013 {
1014         dccp_pr_debug("called shutdown(%x)\n", how);
1015 }
1016
1017 EXPORT_SYMBOL_GPL(dccp_shutdown);
1018
1019 static int __init dccp_mib_init(void)
1020 {
1021         int rc = -ENOMEM;
1022
1023         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1024         if (dccp_statistics[0] == NULL)
1025                 goto out;
1026
1027         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1028         if (dccp_statistics[1] == NULL)
1029                 goto out_free_one;
1030
1031         rc = 0;
1032 out:
1033         return rc;
1034 out_free_one:
1035         free_percpu(dccp_statistics[0]);
1036         dccp_statistics[0] = NULL;
1037         goto out;
1038
1039 }
1040
1041 static void dccp_mib_exit(void)
1042 {
1043         free_percpu(dccp_statistics[0]);
1044         free_percpu(dccp_statistics[1]);
1045         dccp_statistics[0] = dccp_statistics[1] = NULL;
1046 }
1047
1048 static int thash_entries;
1049 module_param(thash_entries, int, 0444);
1050 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1051
1052 #ifdef CONFIG_IP_DCCP_DEBUG
1053 int dccp_debug;
1054 module_param(dccp_debug, bool, 0444);
1055 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1056
1057 EXPORT_SYMBOL_GPL(dccp_debug);
1058 #endif
1059
1060 static int __init dccp_init(void)
1061 {
1062         unsigned long goal;
1063         int ehash_order, bhash_order, i;
1064         int rc = -ENOBUFS;
1065
1066         dccp_hashinfo.bind_bucket_cachep =
1067                 kmem_cache_create("dccp_bind_bucket",
1068                                   sizeof(struct inet_bind_bucket), 0,
1069                                   SLAB_HWCACHE_ALIGN, NULL);
1070         if (!dccp_hashinfo.bind_bucket_cachep)
1071                 goto out;
1072
1073         /*
1074          * Size and allocate the main established and bind bucket
1075          * hash tables.
1076          *
1077          * The methodology is similar to that of the buffer cache.
1078          */
1079         if (num_physpages >= (128 * 1024))
1080                 goal = num_physpages >> (21 - PAGE_SHIFT);
1081         else
1082                 goal = num_physpages >> (23 - PAGE_SHIFT);
1083
1084         if (thash_entries)
1085                 goal = (thash_entries *
1086                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1087         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1088                 ;
1089         do {
1090                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1091                                         sizeof(struct inet_ehash_bucket);
1092                 while (dccp_hashinfo.ehash_size &
1093                        (dccp_hashinfo.ehash_size - 1))
1094                         dccp_hashinfo.ehash_size--;
1095                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1096                         __get_free_pages(GFP_ATOMIC, ehash_order);
1097         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1098
1099         if (!dccp_hashinfo.ehash) {
1100                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1101                 goto out_free_bind_bucket_cachep;
1102         }
1103
1104         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1105                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1106                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1107         }
1108
1109         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1110                         goto out_free_dccp_ehash;
1111
1112         bhash_order = ehash_order;
1113
1114         do {
1115                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1116                                         sizeof(struct inet_bind_hashbucket);
1117                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1118                     bhash_order > 0)
1119                         continue;
1120                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1121                         __get_free_pages(GFP_ATOMIC, bhash_order);
1122         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1123
1124         if (!dccp_hashinfo.bhash) {
1125                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1126                 goto out_free_dccp_locks;
1127         }
1128
1129         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1130                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1131                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1132         }
1133
1134         rc = dccp_mib_init();
1135         if (rc)
1136                 goto out_free_dccp_bhash;
1137
1138         rc = dccp_ackvec_init();
1139         if (rc)
1140                 goto out_free_dccp_mib;
1141
1142         rc = dccp_sysctl_init();
1143         if (rc)
1144                 goto out_ackvec_exit;
1145
1146         dccp_timestamping_init();
1147 out:
1148         return rc;
1149 out_ackvec_exit:
1150         dccp_ackvec_exit();
1151 out_free_dccp_mib:
1152         dccp_mib_exit();
1153 out_free_dccp_bhash:
1154         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1155         dccp_hashinfo.bhash = NULL;
1156 out_free_dccp_locks:
1157         inet_ehash_locks_free(&dccp_hashinfo);
1158 out_free_dccp_ehash:
1159         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1160         dccp_hashinfo.ehash = NULL;
1161 out_free_bind_bucket_cachep:
1162         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1163         dccp_hashinfo.bind_bucket_cachep = NULL;
1164         goto out;
1165 }
1166
1167 static void __exit dccp_fini(void)
1168 {
1169         dccp_mib_exit();
1170         free_pages((unsigned long)dccp_hashinfo.bhash,
1171                    get_order(dccp_hashinfo.bhash_size *
1172                              sizeof(struct inet_bind_hashbucket)));
1173         free_pages((unsigned long)dccp_hashinfo.ehash,
1174                    get_order(dccp_hashinfo.ehash_size *
1175                              sizeof(struct inet_ehash_bucket)));
1176         inet_ehash_locks_free(&dccp_hashinfo);
1177         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1178         dccp_ackvec_exit();
1179         dccp_sysctl_exit();
1180 }
1181
1182 module_init(dccp_init);
1183 module_exit(dccp_fini);
1184
1185 MODULE_LICENSE("GPL");
1186 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1187 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");