dccp ccid-2: Phase out the use of boolean Ack Vector sysctl
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 /* Client retransmits all Confirm options until entering OPEN */
71                 if (oldstate == DCCP_PARTOPEN)
72                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
73                 break;
74
75         case DCCP_CLOSED:
76                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
77                     oldstate == DCCP_CLOSING)
78                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
79
80                 sk->sk_prot->unhash(sk);
81                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
82                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
83                         inet_put_port(sk);
84                 /* fall through */
85         default:
86                 if (oldstate == DCCP_OPEN)
87                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88         }
89
90         /* Change state AFTER socket is unhashed to avoid closed
91          * socket sitting in hash tables.
92          */
93         sk->sk_state = state;
94 }
95
96 EXPORT_SYMBOL_GPL(dccp_set_state);
97
98 static void dccp_finish_passive_close(struct sock *sk)
99 {
100         switch (sk->sk_state) {
101         case DCCP_PASSIVE_CLOSE:
102                 /* Node (client or server) has received Close packet. */
103                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
104                 dccp_set_state(sk, DCCP_CLOSED);
105                 break;
106         case DCCP_PASSIVE_CLOSEREQ:
107                 /*
108                  * Client received CloseReq. We set the `active' flag so that
109                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
110                  */
111                 dccp_send_close(sk, 1);
112                 dccp_set_state(sk, DCCP_CLOSING);
113         }
114 }
115
116 void dccp_done(struct sock *sk)
117 {
118         dccp_set_state(sk, DCCP_CLOSED);
119         dccp_clear_xmit_timers(sk);
120
121         sk->sk_shutdown = SHUTDOWN_MASK;
122
123         if (!sock_flag(sk, SOCK_DEAD))
124                 sk->sk_state_change(sk);
125         else
126                 inet_csk_destroy_sock(sk);
127 }
128
129 EXPORT_SYMBOL_GPL(dccp_done);
130
131 const char *dccp_packet_name(const int type)
132 {
133         static const char *dccp_packet_names[] = {
134                 [DCCP_PKT_REQUEST]  = "REQUEST",
135                 [DCCP_PKT_RESPONSE] = "RESPONSE",
136                 [DCCP_PKT_DATA]     = "DATA",
137                 [DCCP_PKT_ACK]      = "ACK",
138                 [DCCP_PKT_DATAACK]  = "DATAACK",
139                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
140                 [DCCP_PKT_CLOSE]    = "CLOSE",
141                 [DCCP_PKT_RESET]    = "RESET",
142                 [DCCP_PKT_SYNC]     = "SYNC",
143                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
144         };
145
146         if (type >= DCCP_NR_PKT_TYPES)
147                 return "INVALID";
148         else
149                 return dccp_packet_names[type];
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_packet_name);
153
154 const char *dccp_state_name(const int state)
155 {
156         static char *dccp_state_names[] = {
157         [DCCP_OPEN]             = "OPEN",
158         [DCCP_REQUESTING]       = "REQUESTING",
159         [DCCP_PARTOPEN]         = "PARTOPEN",
160         [DCCP_LISTEN]           = "LISTEN",
161         [DCCP_RESPOND]          = "RESPOND",
162         [DCCP_CLOSING]          = "CLOSING",
163         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
164         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
165         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
166         [DCCP_TIME_WAIT]        = "TIME_WAIT",
167         [DCCP_CLOSED]           = "CLOSED",
168         };
169
170         if (state >= DCCP_MAX_STATES)
171                 return "INVALID STATE!";
172         else
173                 return dccp_state_names[state];
174 }
175
176 EXPORT_SYMBOL_GPL(dccp_state_name);
177
178 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
179 {
180         struct dccp_sock *dp = dccp_sk(sk);
181         struct inet_connection_sock *icsk = inet_csk(sk);
182
183         dccp_minisock_init(&dp->dccps_minisock);
184
185         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
186         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
187         sk->sk_state            = DCCP_CLOSED;
188         sk->sk_write_space      = dccp_write_space;
189         icsk->icsk_sync_mss     = dccp_sync_mss;
190         dp->dccps_mss_cache     = 536;
191         dp->dccps_rate_last     = jiffies;
192         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
193         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
194         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
195
196         dccp_init_xmit_timers(sk);
197
198         INIT_LIST_HEAD(&dp->dccps_featneg);
199         /* control socket doesn't need feat nego */
200         if (likely(ctl_sock_initialized))
201                 return dccp_feat_init(sk);
202         return 0;
203 }
204
205 EXPORT_SYMBOL_GPL(dccp_init_sock);
206
207 void dccp_destroy_sock(struct sock *sk)
208 {
209         struct dccp_sock *dp = dccp_sk(sk);
210
211         /*
212          * DCCP doesn't use sk_write_queue, just sk_send_head
213          * for retransmissions
214          */
215         if (sk->sk_send_head != NULL) {
216                 kfree_skb(sk->sk_send_head);
217                 sk->sk_send_head = NULL;
218         }
219
220         /* Clean up a referenced DCCP bind bucket. */
221         if (inet_csk(sk)->icsk_bind_hash != NULL)
222                 inet_put_port(sk);
223
224         kfree(dp->dccps_service_list);
225         dp->dccps_service_list = NULL;
226
227         if (dp->dccps_hc_rx_ackvec != NULL) {
228                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
229                 dp->dccps_hc_rx_ackvec = NULL;
230         }
231         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
232         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
233         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
234
235         /* clean up feature negotiation state */
236         dccp_feat_list_purge(&dp->dccps_featneg);
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240
241 static inline int dccp_listen_start(struct sock *sk, int backlog)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244
245         dp->dccps_role = DCCP_ROLE_LISTEN;
246         /* do not start to listen if feature negotiation setup fails */
247         if (dccp_feat_finalise_settings(dp))
248                 return -EPROTO;
249         return inet_csk_listen_start(sk, backlog);
250 }
251
252 static inline int dccp_need_reset(int state)
253 {
254         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
255                state != DCCP_REQUESTING;
256 }
257
258 int dccp_disconnect(struct sock *sk, int flags)
259 {
260         struct inet_connection_sock *icsk = inet_csk(sk);
261         struct inet_sock *inet = inet_sk(sk);
262         int err = 0;
263         const int old_state = sk->sk_state;
264
265         if (old_state != DCCP_CLOSED)
266                 dccp_set_state(sk, DCCP_CLOSED);
267
268         /*
269          * This corresponds to the ABORT function of RFC793, sec. 3.8
270          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
271          */
272         if (old_state == DCCP_LISTEN) {
273                 inet_csk_listen_stop(sk);
274         } else if (dccp_need_reset(old_state)) {
275                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
276                 sk->sk_err = ECONNRESET;
277         } else if (old_state == DCCP_REQUESTING)
278                 sk->sk_err = ECONNRESET;
279
280         dccp_clear_xmit_timers(sk);
281
282         __skb_queue_purge(&sk->sk_receive_queue);
283         __skb_queue_purge(&sk->sk_write_queue);
284         if (sk->sk_send_head != NULL) {
285                 __kfree_skb(sk->sk_send_head);
286                 sk->sk_send_head = NULL;
287         }
288
289         inet->dport = 0;
290
291         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
292                 inet_reset_saddr(sk);
293
294         sk->sk_shutdown = 0;
295         sock_reset_flag(sk, SOCK_DONE);
296
297         icsk->icsk_backoff = 0;
298         inet_csk_delack_init(sk);
299         __sk_dst_reset(sk);
300
301         WARN_ON(inet->num && !icsk->icsk_bind_hash);
302
303         sk->sk_error_report(sk);
304         return err;
305 }
306
307 EXPORT_SYMBOL_GPL(dccp_disconnect);
308
309 /*
310  *      Wait for a DCCP event.
311  *
312  *      Note that we don't need to lock the socket, as the upper poll layers
313  *      take care of normal races (between the test and the event) and we don't
314  *      go look at any of the socket buffers directly.
315  */
316 unsigned int dccp_poll(struct file *file, struct socket *sock,
317                        poll_table *wait)
318 {
319         unsigned int mask;
320         struct sock *sk = sock->sk;
321
322         poll_wait(file, sk->sk_sleep, wait);
323         if (sk->sk_state == DCCP_LISTEN)
324                 return inet_csk_listen_poll(sk);
325
326         /* Socket is not locked. We are protected from async events
327            by poll logic and correct handling of state changes
328            made by another threads is impossible in any case.
329          */
330
331         mask = 0;
332         if (sk->sk_err)
333                 mask = POLLERR;
334
335         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
336                 mask |= POLLHUP;
337         if (sk->sk_shutdown & RCV_SHUTDOWN)
338                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
339
340         /* Connected? */
341         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
342                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
343                         mask |= POLLIN | POLLRDNORM;
344
345                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
346                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
347                                 mask |= POLLOUT | POLLWRNORM;
348                         } else {  /* send SIGIO later */
349                                 set_bit(SOCK_ASYNC_NOSPACE,
350                                         &sk->sk_socket->flags);
351                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
352
353                                 /* Race breaker. If space is freed after
354                                  * wspace test but before the flags are set,
355                                  * IO signal will be lost.
356                                  */
357                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
358                                         mask |= POLLOUT | POLLWRNORM;
359                         }
360                 }
361         }
362         return mask;
363 }
364
365 EXPORT_SYMBOL_GPL(dccp_poll);
366
367 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
368 {
369         int rc = -ENOTCONN;
370
371         lock_sock(sk);
372
373         if (sk->sk_state == DCCP_LISTEN)
374                 goto out;
375
376         switch (cmd) {
377         case SIOCINQ: {
378                 struct sk_buff *skb;
379                 unsigned long amount = 0;
380
381                 skb = skb_peek(&sk->sk_receive_queue);
382                 if (skb != NULL) {
383                         /*
384                          * We will only return the amount of this packet since
385                          * that is all that will be read.
386                          */
387                         amount = skb->len;
388                 }
389                 rc = put_user(amount, (int __user *)arg);
390         }
391                 break;
392         default:
393                 rc = -ENOIOCTLCMD;
394                 break;
395         }
396 out:
397         release_sock(sk);
398         return rc;
399 }
400
401 EXPORT_SYMBOL_GPL(dccp_ioctl);
402
403 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
404                                    char __user *optval, int optlen)
405 {
406         struct dccp_sock *dp = dccp_sk(sk);
407         struct dccp_service_list *sl = NULL;
408
409         if (service == DCCP_SERVICE_INVALID_VALUE ||
410             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
411                 return -EINVAL;
412
413         if (optlen > sizeof(service)) {
414                 sl = kmalloc(optlen, GFP_KERNEL);
415                 if (sl == NULL)
416                         return -ENOMEM;
417
418                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
419                 if (copy_from_user(sl->dccpsl_list,
420                                    optval + sizeof(service),
421                                    optlen - sizeof(service)) ||
422                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
423                         kfree(sl);
424                         return -EFAULT;
425                 }
426         }
427
428         lock_sock(sk);
429         dp->dccps_service = service;
430
431         kfree(dp->dccps_service_list);
432
433         dp->dccps_service_list = sl;
434         release_sock(sk);
435         return 0;
436 }
437
438 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
439 {
440         u8 *list, len;
441         int i, rc;
442
443         if (cscov < 0 || cscov > 15)
444                 return -EINVAL;
445         /*
446          * Populate a list of permissible values, in the range cscov...15. This
447          * is necessary since feature negotiation of single values only works if
448          * both sides incidentally choose the same value. Since the list starts
449          * lowest-value first, negotiation will pick the smallest shared value.
450          */
451         if (cscov == 0)
452                 return 0;
453         len = 16 - cscov;
454
455         list = kmalloc(len, GFP_KERNEL);
456         if (list == NULL)
457                 return -ENOBUFS;
458
459         for (i = 0; i < len; i++)
460                 list[i] = cscov++;
461
462         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
463
464         if (rc == 0) {
465                 if (rx)
466                         dccp_sk(sk)->dccps_pcrlen = cscov;
467                 else
468                         dccp_sk(sk)->dccps_pcslen = cscov;
469         }
470         kfree(list);
471         return rc;
472 }
473
474 static int dccp_setsockopt_ccid(struct sock *sk, int type,
475                                 char __user *optval, int optlen)
476 {
477         u8 *val;
478         int rc = 0;
479
480         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
481                 return -EINVAL;
482
483         val = kmalloc(optlen, GFP_KERNEL);
484         if (val == NULL)
485                 return -ENOMEM;
486
487         if (copy_from_user(val, optval, optlen)) {
488                 kfree(val);
489                 return -EFAULT;
490         }
491
492         lock_sock(sk);
493         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
494                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
495
496         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
497                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
498         release_sock(sk);
499
500         kfree(val);
501         return rc;
502 }
503
504 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
505                 char __user *optval, int optlen)
506 {
507         struct dccp_sock *dp = dccp_sk(sk);
508         int val, err = 0;
509
510         switch (optname) {
511         case DCCP_SOCKOPT_PACKET_SIZE:
512                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
513                 return 0;
514         case DCCP_SOCKOPT_CHANGE_L:
515         case DCCP_SOCKOPT_CHANGE_R:
516                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
517                 return 0;
518         case DCCP_SOCKOPT_CCID:
519         case DCCP_SOCKOPT_RX_CCID:
520         case DCCP_SOCKOPT_TX_CCID:
521                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
522         }
523
524         if (optlen < (int)sizeof(int))
525                 return -EINVAL;
526
527         if (get_user(val, (int __user *)optval))
528                 return -EFAULT;
529
530         if (optname == DCCP_SOCKOPT_SERVICE)
531                 return dccp_setsockopt_service(sk, val, optval, optlen);
532
533         lock_sock(sk);
534         switch (optname) {
535         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
536                 if (dp->dccps_role != DCCP_ROLE_SERVER)
537                         err = -EOPNOTSUPP;
538                 else
539                         dp->dccps_server_timewait = (val != 0);
540                 break;
541         case DCCP_SOCKOPT_SEND_CSCOV:
542                 err = dccp_setsockopt_cscov(sk, val, false);
543                 break;
544         case DCCP_SOCKOPT_RECV_CSCOV:
545                 err = dccp_setsockopt_cscov(sk, val, true);
546                 break;
547         default:
548                 err = -ENOPROTOOPT;
549                 break;
550         }
551         release_sock(sk);
552
553         return err;
554 }
555
556 int dccp_setsockopt(struct sock *sk, int level, int optname,
557                     char __user *optval, int optlen)
558 {
559         if (level != SOL_DCCP)
560                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
561                                                              optname, optval,
562                                                              optlen);
563         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
564 }
565
566 EXPORT_SYMBOL_GPL(dccp_setsockopt);
567
568 #ifdef CONFIG_COMPAT
569 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
570                            char __user *optval, int optlen)
571 {
572         if (level != SOL_DCCP)
573                 return inet_csk_compat_setsockopt(sk, level, optname,
574                                                   optval, optlen);
575         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
576 }
577
578 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
579 #endif
580
581 static int dccp_getsockopt_service(struct sock *sk, int len,
582                                    __be32 __user *optval,
583                                    int __user *optlen)
584 {
585         const struct dccp_sock *dp = dccp_sk(sk);
586         const struct dccp_service_list *sl;
587         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
588
589         lock_sock(sk);
590         if ((sl = dp->dccps_service_list) != NULL) {
591                 slen = sl->dccpsl_nr * sizeof(u32);
592                 total_len += slen;
593         }
594
595         err = -EINVAL;
596         if (total_len > len)
597                 goto out;
598
599         err = 0;
600         if (put_user(total_len, optlen) ||
601             put_user(dp->dccps_service, optval) ||
602             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
603                 err = -EFAULT;
604 out:
605         release_sock(sk);
606         return err;
607 }
608
609 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
610                     char __user *optval, int __user *optlen)
611 {
612         struct dccp_sock *dp;
613         int val, len;
614
615         if (get_user(len, optlen))
616                 return -EFAULT;
617
618         if (len < (int)sizeof(int))
619                 return -EINVAL;
620
621         dp = dccp_sk(sk);
622
623         switch (optname) {
624         case DCCP_SOCKOPT_PACKET_SIZE:
625                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
626                 return 0;
627         case DCCP_SOCKOPT_SERVICE:
628                 return dccp_getsockopt_service(sk, len,
629                                                (__be32 __user *)optval, optlen);
630         case DCCP_SOCKOPT_GET_CUR_MPS:
631                 val = dp->dccps_mss_cache;
632                 break;
633         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
634                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
635         case DCCP_SOCKOPT_TX_CCID:
636                 val = ccid_get_current_tx_ccid(dp);
637                 if (val < 0)
638                         return -ENOPROTOOPT;
639                 break;
640         case DCCP_SOCKOPT_RX_CCID:
641                 val = ccid_get_current_rx_ccid(dp);
642                 if (val < 0)
643                         return -ENOPROTOOPT;
644                 break;
645         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
646                 val = dp->dccps_server_timewait;
647                 break;
648         case DCCP_SOCKOPT_SEND_CSCOV:
649                 val = dp->dccps_pcslen;
650                 break;
651         case DCCP_SOCKOPT_RECV_CSCOV:
652                 val = dp->dccps_pcrlen;
653                 break;
654         case 128 ... 191:
655                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
656                                              len, (u32 __user *)optval, optlen);
657         case 192 ... 255:
658                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
659                                              len, (u32 __user *)optval, optlen);
660         default:
661                 return -ENOPROTOOPT;
662         }
663
664         len = sizeof(val);
665         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
666                 return -EFAULT;
667
668         return 0;
669 }
670
671 int dccp_getsockopt(struct sock *sk, int level, int optname,
672                     char __user *optval, int __user *optlen)
673 {
674         if (level != SOL_DCCP)
675                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
676                                                              optname, optval,
677                                                              optlen);
678         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
679 }
680
681 EXPORT_SYMBOL_GPL(dccp_getsockopt);
682
683 #ifdef CONFIG_COMPAT
684 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
685                            char __user *optval, int __user *optlen)
686 {
687         if (level != SOL_DCCP)
688                 return inet_csk_compat_getsockopt(sk, level, optname,
689                                                   optval, optlen);
690         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
691 }
692
693 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
694 #endif
695
696 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
697                  size_t len)
698 {
699         const struct dccp_sock *dp = dccp_sk(sk);
700         const int flags = msg->msg_flags;
701         const int noblock = flags & MSG_DONTWAIT;
702         struct sk_buff *skb;
703         int rc, size;
704         long timeo;
705
706         if (len > dp->dccps_mss_cache)
707                 return -EMSGSIZE;
708
709         lock_sock(sk);
710
711         if (sysctl_dccp_tx_qlen &&
712             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
713                 rc = -EAGAIN;
714                 goto out_release;
715         }
716
717         timeo = sock_sndtimeo(sk, noblock);
718
719         /*
720          * We have to use sk_stream_wait_connect here to set sk_write_pending,
721          * so that the trick in dccp_rcv_request_sent_state_process.
722          */
723         /* Wait for a connection to finish. */
724         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
725                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
726                         goto out_release;
727
728         size = sk->sk_prot->max_header + len;
729         release_sock(sk);
730         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
731         lock_sock(sk);
732         if (skb == NULL)
733                 goto out_release;
734
735         skb_reserve(skb, sk->sk_prot->max_header);
736         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
737         if (rc != 0)
738                 goto out_discard;
739
740         skb_queue_tail(&sk->sk_write_queue, skb);
741         dccp_write_xmit(sk,0);
742 out_release:
743         release_sock(sk);
744         return rc ? : len;
745 out_discard:
746         kfree_skb(skb);
747         goto out_release;
748 }
749
750 EXPORT_SYMBOL_GPL(dccp_sendmsg);
751
752 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
753                  size_t len, int nonblock, int flags, int *addr_len)
754 {
755         const struct dccp_hdr *dh;
756         long timeo;
757
758         lock_sock(sk);
759
760         if (sk->sk_state == DCCP_LISTEN) {
761                 len = -ENOTCONN;
762                 goto out;
763         }
764
765         timeo = sock_rcvtimeo(sk, nonblock);
766
767         do {
768                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
769
770                 if (skb == NULL)
771                         goto verify_sock_status;
772
773                 dh = dccp_hdr(skb);
774
775                 switch (dh->dccph_type) {
776                 case DCCP_PKT_DATA:
777                 case DCCP_PKT_DATAACK:
778                         goto found_ok_skb;
779
780                 case DCCP_PKT_CLOSE:
781                 case DCCP_PKT_CLOSEREQ:
782                         if (!(flags & MSG_PEEK))
783                                 dccp_finish_passive_close(sk);
784                         /* fall through */
785                 case DCCP_PKT_RESET:
786                         dccp_pr_debug("found fin (%s) ok!\n",
787                                       dccp_packet_name(dh->dccph_type));
788                         len = 0;
789                         goto found_fin_ok;
790                 default:
791                         dccp_pr_debug("packet_type=%s\n",
792                                       dccp_packet_name(dh->dccph_type));
793                         sk_eat_skb(sk, skb, 0);
794                 }
795 verify_sock_status:
796                 if (sock_flag(sk, SOCK_DONE)) {
797                         len = 0;
798                         break;
799                 }
800
801                 if (sk->sk_err) {
802                         len = sock_error(sk);
803                         break;
804                 }
805
806                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
807                         len = 0;
808                         break;
809                 }
810
811                 if (sk->sk_state == DCCP_CLOSED) {
812                         if (!sock_flag(sk, SOCK_DONE)) {
813                                 /* This occurs when user tries to read
814                                  * from never connected socket.
815                                  */
816                                 len = -ENOTCONN;
817                                 break;
818                         }
819                         len = 0;
820                         break;
821                 }
822
823                 if (!timeo) {
824                         len = -EAGAIN;
825                         break;
826                 }
827
828                 if (signal_pending(current)) {
829                         len = sock_intr_errno(timeo);
830                         break;
831                 }
832
833                 sk_wait_data(sk, &timeo);
834                 continue;
835         found_ok_skb:
836                 if (len > skb->len)
837                         len = skb->len;
838                 else if (len < skb->len)
839                         msg->msg_flags |= MSG_TRUNC;
840
841                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
842                         /* Exception. Bailout! */
843                         len = -EFAULT;
844                         break;
845                 }
846         found_fin_ok:
847                 if (!(flags & MSG_PEEK))
848                         sk_eat_skb(sk, skb, 0);
849                 break;
850         } while (1);
851 out:
852         release_sock(sk);
853         return len;
854 }
855
856 EXPORT_SYMBOL_GPL(dccp_recvmsg);
857
858 int inet_dccp_listen(struct socket *sock, int backlog)
859 {
860         struct sock *sk = sock->sk;
861         unsigned char old_state;
862         int err;
863
864         lock_sock(sk);
865
866         err = -EINVAL;
867         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
868                 goto out;
869
870         old_state = sk->sk_state;
871         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
872                 goto out;
873
874         /* Really, if the socket is already in listen state
875          * we can only allow the backlog to be adjusted.
876          */
877         if (old_state != DCCP_LISTEN) {
878                 /*
879                  * FIXME: here it probably should be sk->sk_prot->listen_start
880                  * see tcp_listen_start
881                  */
882                 err = dccp_listen_start(sk, backlog);
883                 if (err)
884                         goto out;
885         }
886         sk->sk_max_ack_backlog = backlog;
887         err = 0;
888
889 out:
890         release_sock(sk);
891         return err;
892 }
893
894 EXPORT_SYMBOL_GPL(inet_dccp_listen);
895
896 static void dccp_terminate_connection(struct sock *sk)
897 {
898         u8 next_state = DCCP_CLOSED;
899
900         switch (sk->sk_state) {
901         case DCCP_PASSIVE_CLOSE:
902         case DCCP_PASSIVE_CLOSEREQ:
903                 dccp_finish_passive_close(sk);
904                 break;
905         case DCCP_PARTOPEN:
906                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
907                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
908                 /* fall through */
909         case DCCP_OPEN:
910                 dccp_send_close(sk, 1);
911
912                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
913                     !dccp_sk(sk)->dccps_server_timewait)
914                         next_state = DCCP_ACTIVE_CLOSEREQ;
915                 else
916                         next_state = DCCP_CLOSING;
917                 /* fall through */
918         default:
919                 dccp_set_state(sk, next_state);
920         }
921 }
922
923 void dccp_close(struct sock *sk, long timeout)
924 {
925         struct dccp_sock *dp = dccp_sk(sk);
926         struct sk_buff *skb;
927         u32 data_was_unread = 0;
928         int state;
929
930         lock_sock(sk);
931
932         sk->sk_shutdown = SHUTDOWN_MASK;
933
934         if (sk->sk_state == DCCP_LISTEN) {
935                 dccp_set_state(sk, DCCP_CLOSED);
936
937                 /* Special case. */
938                 inet_csk_listen_stop(sk);
939
940                 goto adjudge_to_death;
941         }
942
943         sk_stop_timer(sk, &dp->dccps_xmit_timer);
944
945         /*
946          * We need to flush the recv. buffs.  We do this only on the
947          * descriptor close, not protocol-sourced closes, because the
948           *reader process may not have drained the data yet!
949          */
950         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
951                 data_was_unread += skb->len;
952                 __kfree_skb(skb);
953         }
954
955         if (data_was_unread) {
956                 /* Unread data was tossed, send an appropriate Reset Code */
957                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
958                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
959                 dccp_set_state(sk, DCCP_CLOSED);
960         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
961                 /* Check zero linger _after_ checking for unread data. */
962                 sk->sk_prot->disconnect(sk, 0);
963         } else if (sk->sk_state != DCCP_CLOSED) {
964                 dccp_terminate_connection(sk);
965         }
966
967         sk_stream_wait_close(sk, timeout);
968
969 adjudge_to_death:
970         state = sk->sk_state;
971         sock_hold(sk);
972         sock_orphan(sk);
973         atomic_inc(sk->sk_prot->orphan_count);
974
975         /*
976          * It is the last release_sock in its life. It will remove backlog.
977          */
978         release_sock(sk);
979         /*
980          * Now socket is owned by kernel and we acquire BH lock
981          * to finish close. No need to check for user refs.
982          */
983         local_bh_disable();
984         bh_lock_sock(sk);
985         WARN_ON(sock_owned_by_user(sk));
986
987         /* Have we already been destroyed by a softirq or backlog? */
988         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
989                 goto out;
990
991         if (sk->sk_state == DCCP_CLOSED)
992                 inet_csk_destroy_sock(sk);
993
994         /* Otherwise, socket is reprieved until protocol close. */
995
996 out:
997         bh_unlock_sock(sk);
998         local_bh_enable();
999         sock_put(sk);
1000 }
1001
1002 EXPORT_SYMBOL_GPL(dccp_close);
1003
1004 void dccp_shutdown(struct sock *sk, int how)
1005 {
1006         dccp_pr_debug("called shutdown(%x)\n", how);
1007 }
1008
1009 EXPORT_SYMBOL_GPL(dccp_shutdown);
1010
1011 static inline int dccp_mib_init(void)
1012 {
1013         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1014 }
1015
1016 static inline void dccp_mib_exit(void)
1017 {
1018         snmp_mib_free((void**)dccp_statistics);
1019 }
1020
1021 static int thash_entries;
1022 module_param(thash_entries, int, 0444);
1023 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1024
1025 #ifdef CONFIG_IP_DCCP_DEBUG
1026 int dccp_debug;
1027 module_param(dccp_debug, bool, 0644);
1028 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1029
1030 EXPORT_SYMBOL_GPL(dccp_debug);
1031 #endif
1032
1033 static int __init dccp_init(void)
1034 {
1035         unsigned long goal;
1036         int ehash_order, bhash_order, i;
1037         int rc = -ENOBUFS;
1038
1039         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1040                      FIELD_SIZEOF(struct sk_buff, cb));
1041
1042         dccp_hashinfo.bind_bucket_cachep =
1043                 kmem_cache_create("dccp_bind_bucket",
1044                                   sizeof(struct inet_bind_bucket), 0,
1045                                   SLAB_HWCACHE_ALIGN, NULL);
1046         if (!dccp_hashinfo.bind_bucket_cachep)
1047                 goto out;
1048
1049         /*
1050          * Size and allocate the main established and bind bucket
1051          * hash tables.
1052          *
1053          * The methodology is similar to that of the buffer cache.
1054          */
1055         if (num_physpages >= (128 * 1024))
1056                 goal = num_physpages >> (21 - PAGE_SHIFT);
1057         else
1058                 goal = num_physpages >> (23 - PAGE_SHIFT);
1059
1060         if (thash_entries)
1061                 goal = (thash_entries *
1062                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1063         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1064                 ;
1065         do {
1066                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1067                                         sizeof(struct inet_ehash_bucket);
1068                 while (dccp_hashinfo.ehash_size &
1069                        (dccp_hashinfo.ehash_size - 1))
1070                         dccp_hashinfo.ehash_size--;
1071                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1072                         __get_free_pages(GFP_ATOMIC, ehash_order);
1073         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1074
1075         if (!dccp_hashinfo.ehash) {
1076                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1077                 goto out_free_bind_bucket_cachep;
1078         }
1079
1080         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1081                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1082                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1083         }
1084
1085         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1086                         goto out_free_dccp_ehash;
1087
1088         bhash_order = ehash_order;
1089
1090         do {
1091                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1092                                         sizeof(struct inet_bind_hashbucket);
1093                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1094                     bhash_order > 0)
1095                         continue;
1096                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1097                         __get_free_pages(GFP_ATOMIC, bhash_order);
1098         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1099
1100         if (!dccp_hashinfo.bhash) {
1101                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1102                 goto out_free_dccp_locks;
1103         }
1104
1105         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1106                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1107                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1108         }
1109
1110         rc = dccp_mib_init();
1111         if (rc)
1112                 goto out_free_dccp_bhash;
1113
1114         rc = dccp_ackvec_init();
1115         if (rc)
1116                 goto out_free_dccp_mib;
1117
1118         rc = dccp_sysctl_init();
1119         if (rc)
1120                 goto out_ackvec_exit;
1121
1122         dccp_timestamping_init();
1123 out:
1124         return rc;
1125 out_ackvec_exit:
1126         dccp_ackvec_exit();
1127 out_free_dccp_mib:
1128         dccp_mib_exit();
1129 out_free_dccp_bhash:
1130         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1131         dccp_hashinfo.bhash = NULL;
1132 out_free_dccp_locks:
1133         inet_ehash_locks_free(&dccp_hashinfo);
1134 out_free_dccp_ehash:
1135         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1136         dccp_hashinfo.ehash = NULL;
1137 out_free_bind_bucket_cachep:
1138         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1139         dccp_hashinfo.bind_bucket_cachep = NULL;
1140         goto out;
1141 }
1142
1143 static void __exit dccp_fini(void)
1144 {
1145         dccp_mib_exit();
1146         free_pages((unsigned long)dccp_hashinfo.bhash,
1147                    get_order(dccp_hashinfo.bhash_size *
1148                              sizeof(struct inet_bind_hashbucket)));
1149         free_pages((unsigned long)dccp_hashinfo.ehash,
1150                    get_order(dccp_hashinfo.ehash_size *
1151                              sizeof(struct inet_ehash_bucket)));
1152         inet_ehash_locks_free(&dccp_hashinfo);
1153         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1154         dccp_ackvec_exit();
1155         dccp_sysctl_exit();
1156 }
1157
1158 module_init(dccp_init);
1159 module_exit(dccp_fini);
1160
1161 MODULE_LICENSE("GPL");
1162 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1163 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");