dccp: Implement both feature-local and feature-remote Sequence Window feature
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 /* Client retransmits all Confirm options until entering OPEN */
71                 if (oldstate == DCCP_PARTOPEN)
72                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
73                 break;
74
75         case DCCP_CLOSED:
76                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
77                     oldstate == DCCP_CLOSING)
78                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
79
80                 sk->sk_prot->unhash(sk);
81                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
82                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
83                         inet_put_port(sk);
84                 /* fall through */
85         default:
86                 if (oldstate == DCCP_OPEN)
87                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88         }
89
90         /* Change state AFTER socket is unhashed to avoid closed
91          * socket sitting in hash tables.
92          */
93         sk->sk_state = state;
94 }
95
96 EXPORT_SYMBOL_GPL(dccp_set_state);
97
98 static void dccp_finish_passive_close(struct sock *sk)
99 {
100         switch (sk->sk_state) {
101         case DCCP_PASSIVE_CLOSE:
102                 /* Node (client or server) has received Close packet. */
103                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
104                 dccp_set_state(sk, DCCP_CLOSED);
105                 break;
106         case DCCP_PASSIVE_CLOSEREQ:
107                 /*
108                  * Client received CloseReq. We set the `active' flag so that
109                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
110                  */
111                 dccp_send_close(sk, 1);
112                 dccp_set_state(sk, DCCP_CLOSING);
113         }
114 }
115
116 void dccp_done(struct sock *sk)
117 {
118         dccp_set_state(sk, DCCP_CLOSED);
119         dccp_clear_xmit_timers(sk);
120
121         sk->sk_shutdown = SHUTDOWN_MASK;
122
123         if (!sock_flag(sk, SOCK_DEAD))
124                 sk->sk_state_change(sk);
125         else
126                 inet_csk_destroy_sock(sk);
127 }
128
129 EXPORT_SYMBOL_GPL(dccp_done);
130
131 const char *dccp_packet_name(const int type)
132 {
133         static const char *dccp_packet_names[] = {
134                 [DCCP_PKT_REQUEST]  = "REQUEST",
135                 [DCCP_PKT_RESPONSE] = "RESPONSE",
136                 [DCCP_PKT_DATA]     = "DATA",
137                 [DCCP_PKT_ACK]      = "ACK",
138                 [DCCP_PKT_DATAACK]  = "DATAACK",
139                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
140                 [DCCP_PKT_CLOSE]    = "CLOSE",
141                 [DCCP_PKT_RESET]    = "RESET",
142                 [DCCP_PKT_SYNC]     = "SYNC",
143                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
144         };
145
146         if (type >= DCCP_NR_PKT_TYPES)
147                 return "INVALID";
148         else
149                 return dccp_packet_names[type];
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_packet_name);
153
154 const char *dccp_state_name(const int state)
155 {
156         static char *dccp_state_names[] = {
157         [DCCP_OPEN]             = "OPEN",
158         [DCCP_REQUESTING]       = "REQUESTING",
159         [DCCP_PARTOPEN]         = "PARTOPEN",
160         [DCCP_LISTEN]           = "LISTEN",
161         [DCCP_RESPOND]          = "RESPOND",
162         [DCCP_CLOSING]          = "CLOSING",
163         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
164         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
165         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
166         [DCCP_TIME_WAIT]        = "TIME_WAIT",
167         [DCCP_CLOSED]           = "CLOSED",
168         };
169
170         if (state >= DCCP_MAX_STATES)
171                 return "INVALID STATE!";
172         else
173                 return dccp_state_names[state];
174 }
175
176 EXPORT_SYMBOL_GPL(dccp_state_name);
177
178 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
179 {
180         struct dccp_sock *dp = dccp_sk(sk);
181         struct inet_connection_sock *icsk = inet_csk(sk);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         INIT_LIST_HEAD(&dp->dccps_featneg);
197         /* control socket doesn't need feat nego */
198         if (likely(ctl_sock_initialized))
199                 return dccp_feat_init(sk);
200         return 0;
201 }
202
203 EXPORT_SYMBOL_GPL(dccp_init_sock);
204
205 void dccp_destroy_sock(struct sock *sk)
206 {
207         struct dccp_sock *dp = dccp_sk(sk);
208
209         /*
210          * DCCP doesn't use sk_write_queue, just sk_send_head
211          * for retransmissions
212          */
213         if (sk->sk_send_head != NULL) {
214                 kfree_skb(sk->sk_send_head);
215                 sk->sk_send_head = NULL;
216         }
217
218         /* Clean up a referenced DCCP bind bucket. */
219         if (inet_csk(sk)->icsk_bind_hash != NULL)
220                 inet_put_port(sk);
221
222         kfree(dp->dccps_service_list);
223         dp->dccps_service_list = NULL;
224
225         if (dp->dccps_hc_rx_ackvec != NULL) {
226                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
227                 dp->dccps_hc_rx_ackvec = NULL;
228         }
229         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
230         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
231         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
232
233         /* clean up feature negotiation state */
234         dccp_feat_list_purge(&dp->dccps_featneg);
235 }
236
237 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
238
239 static inline int dccp_listen_start(struct sock *sk, int backlog)
240 {
241         struct dccp_sock *dp = dccp_sk(sk);
242
243         dp->dccps_role = DCCP_ROLE_LISTEN;
244         /* do not start to listen if feature negotiation setup fails */
245         if (dccp_feat_finalise_settings(dp))
246                 return -EPROTO;
247         return inet_csk_listen_start(sk, backlog);
248 }
249
250 static inline int dccp_need_reset(int state)
251 {
252         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
253                state != DCCP_REQUESTING;
254 }
255
256 int dccp_disconnect(struct sock *sk, int flags)
257 {
258         struct inet_connection_sock *icsk = inet_csk(sk);
259         struct inet_sock *inet = inet_sk(sk);
260         int err = 0;
261         const int old_state = sk->sk_state;
262
263         if (old_state != DCCP_CLOSED)
264                 dccp_set_state(sk, DCCP_CLOSED);
265
266         /*
267          * This corresponds to the ABORT function of RFC793, sec. 3.8
268          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
269          */
270         if (old_state == DCCP_LISTEN) {
271                 inet_csk_listen_stop(sk);
272         } else if (dccp_need_reset(old_state)) {
273                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
274                 sk->sk_err = ECONNRESET;
275         } else if (old_state == DCCP_REQUESTING)
276                 sk->sk_err = ECONNRESET;
277
278         dccp_clear_xmit_timers(sk);
279
280         __skb_queue_purge(&sk->sk_receive_queue);
281         __skb_queue_purge(&sk->sk_write_queue);
282         if (sk->sk_send_head != NULL) {
283                 __kfree_skb(sk->sk_send_head);
284                 sk->sk_send_head = NULL;
285         }
286
287         inet->dport = 0;
288
289         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
290                 inet_reset_saddr(sk);
291
292         sk->sk_shutdown = 0;
293         sock_reset_flag(sk, SOCK_DONE);
294
295         icsk->icsk_backoff = 0;
296         inet_csk_delack_init(sk);
297         __sk_dst_reset(sk);
298
299         WARN_ON(inet->num && !icsk->icsk_bind_hash);
300
301         sk->sk_error_report(sk);
302         return err;
303 }
304
305 EXPORT_SYMBOL_GPL(dccp_disconnect);
306
307 /*
308  *      Wait for a DCCP event.
309  *
310  *      Note that we don't need to lock the socket, as the upper poll layers
311  *      take care of normal races (between the test and the event) and we don't
312  *      go look at any of the socket buffers directly.
313  */
314 unsigned int dccp_poll(struct file *file, struct socket *sock,
315                        poll_table *wait)
316 {
317         unsigned int mask;
318         struct sock *sk = sock->sk;
319
320         poll_wait(file, sk->sk_sleep, wait);
321         if (sk->sk_state == DCCP_LISTEN)
322                 return inet_csk_listen_poll(sk);
323
324         /* Socket is not locked. We are protected from async events
325            by poll logic and correct handling of state changes
326            made by another threads is impossible in any case.
327          */
328
329         mask = 0;
330         if (sk->sk_err)
331                 mask = POLLERR;
332
333         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
334                 mask |= POLLHUP;
335         if (sk->sk_shutdown & RCV_SHUTDOWN)
336                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
337
338         /* Connected? */
339         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
340                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
341                         mask |= POLLIN | POLLRDNORM;
342
343                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
344                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
345                                 mask |= POLLOUT | POLLWRNORM;
346                         } else {  /* send SIGIO later */
347                                 set_bit(SOCK_ASYNC_NOSPACE,
348                                         &sk->sk_socket->flags);
349                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
350
351                                 /* Race breaker. If space is freed after
352                                  * wspace test but before the flags are set,
353                                  * IO signal will be lost.
354                                  */
355                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
356                                         mask |= POLLOUT | POLLWRNORM;
357                         }
358                 }
359         }
360         return mask;
361 }
362
363 EXPORT_SYMBOL_GPL(dccp_poll);
364
365 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
366 {
367         int rc = -ENOTCONN;
368
369         lock_sock(sk);
370
371         if (sk->sk_state == DCCP_LISTEN)
372                 goto out;
373
374         switch (cmd) {
375         case SIOCINQ: {
376                 struct sk_buff *skb;
377                 unsigned long amount = 0;
378
379                 skb = skb_peek(&sk->sk_receive_queue);
380                 if (skb != NULL) {
381                         /*
382                          * We will only return the amount of this packet since
383                          * that is all that will be read.
384                          */
385                         amount = skb->len;
386                 }
387                 rc = put_user(amount, (int __user *)arg);
388         }
389                 break;
390         default:
391                 rc = -ENOIOCTLCMD;
392                 break;
393         }
394 out:
395         release_sock(sk);
396         return rc;
397 }
398
399 EXPORT_SYMBOL_GPL(dccp_ioctl);
400
401 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
402                                    char __user *optval, int optlen)
403 {
404         struct dccp_sock *dp = dccp_sk(sk);
405         struct dccp_service_list *sl = NULL;
406
407         if (service == DCCP_SERVICE_INVALID_VALUE ||
408             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
409                 return -EINVAL;
410
411         if (optlen > sizeof(service)) {
412                 sl = kmalloc(optlen, GFP_KERNEL);
413                 if (sl == NULL)
414                         return -ENOMEM;
415
416                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
417                 if (copy_from_user(sl->dccpsl_list,
418                                    optval + sizeof(service),
419                                    optlen - sizeof(service)) ||
420                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
421                         kfree(sl);
422                         return -EFAULT;
423                 }
424         }
425
426         lock_sock(sk);
427         dp->dccps_service = service;
428
429         kfree(dp->dccps_service_list);
430
431         dp->dccps_service_list = sl;
432         release_sock(sk);
433         return 0;
434 }
435
436 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
437 {
438         u8 *list, len;
439         int i, rc;
440
441         if (cscov < 0 || cscov > 15)
442                 return -EINVAL;
443         /*
444          * Populate a list of permissible values, in the range cscov...15. This
445          * is necessary since feature negotiation of single values only works if
446          * both sides incidentally choose the same value. Since the list starts
447          * lowest-value first, negotiation will pick the smallest shared value.
448          */
449         if (cscov == 0)
450                 return 0;
451         len = 16 - cscov;
452
453         list = kmalloc(len, GFP_KERNEL);
454         if (list == NULL)
455                 return -ENOBUFS;
456
457         for (i = 0; i < len; i++)
458                 list[i] = cscov++;
459
460         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
461
462         if (rc == 0) {
463                 if (rx)
464                         dccp_sk(sk)->dccps_pcrlen = cscov;
465                 else
466                         dccp_sk(sk)->dccps_pcslen = cscov;
467         }
468         kfree(list);
469         return rc;
470 }
471
472 static int dccp_setsockopt_ccid(struct sock *sk, int type,
473                                 char __user *optval, int optlen)
474 {
475         u8 *val;
476         int rc = 0;
477
478         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
479                 return -EINVAL;
480
481         val = kmalloc(optlen, GFP_KERNEL);
482         if (val == NULL)
483                 return -ENOMEM;
484
485         if (copy_from_user(val, optval, optlen)) {
486                 kfree(val);
487                 return -EFAULT;
488         }
489
490         lock_sock(sk);
491         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
492                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
493
494         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
495                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
496         release_sock(sk);
497
498         kfree(val);
499         return rc;
500 }
501
502 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
503                 char __user *optval, int optlen)
504 {
505         struct dccp_sock *dp = dccp_sk(sk);
506         int val, err = 0;
507
508         switch (optname) {
509         case DCCP_SOCKOPT_PACKET_SIZE:
510                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
511                 return 0;
512         case DCCP_SOCKOPT_CHANGE_L:
513         case DCCP_SOCKOPT_CHANGE_R:
514                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
515                 return 0;
516         case DCCP_SOCKOPT_CCID:
517         case DCCP_SOCKOPT_RX_CCID:
518         case DCCP_SOCKOPT_TX_CCID:
519                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
520         }
521
522         if (optlen < (int)sizeof(int))
523                 return -EINVAL;
524
525         if (get_user(val, (int __user *)optval))
526                 return -EFAULT;
527
528         if (optname == DCCP_SOCKOPT_SERVICE)
529                 return dccp_setsockopt_service(sk, val, optval, optlen);
530
531         lock_sock(sk);
532         switch (optname) {
533         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
534                 if (dp->dccps_role != DCCP_ROLE_SERVER)
535                         err = -EOPNOTSUPP;
536                 else
537                         dp->dccps_server_timewait = (val != 0);
538                 break;
539         case DCCP_SOCKOPT_SEND_CSCOV:
540                 err = dccp_setsockopt_cscov(sk, val, false);
541                 break;
542         case DCCP_SOCKOPT_RECV_CSCOV:
543                 err = dccp_setsockopt_cscov(sk, val, true);
544                 break;
545         default:
546                 err = -ENOPROTOOPT;
547                 break;
548         }
549         release_sock(sk);
550
551         return err;
552 }
553
554 int dccp_setsockopt(struct sock *sk, int level, int optname,
555                     char __user *optval, int optlen)
556 {
557         if (level != SOL_DCCP)
558                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
559                                                              optname, optval,
560                                                              optlen);
561         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
562 }
563
564 EXPORT_SYMBOL_GPL(dccp_setsockopt);
565
566 #ifdef CONFIG_COMPAT
567 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
568                            char __user *optval, int optlen)
569 {
570         if (level != SOL_DCCP)
571                 return inet_csk_compat_setsockopt(sk, level, optname,
572                                                   optval, optlen);
573         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
574 }
575
576 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
577 #endif
578
579 static int dccp_getsockopt_service(struct sock *sk, int len,
580                                    __be32 __user *optval,
581                                    int __user *optlen)
582 {
583         const struct dccp_sock *dp = dccp_sk(sk);
584         const struct dccp_service_list *sl;
585         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
586
587         lock_sock(sk);
588         if ((sl = dp->dccps_service_list) != NULL) {
589                 slen = sl->dccpsl_nr * sizeof(u32);
590                 total_len += slen;
591         }
592
593         err = -EINVAL;
594         if (total_len > len)
595                 goto out;
596
597         err = 0;
598         if (put_user(total_len, optlen) ||
599             put_user(dp->dccps_service, optval) ||
600             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
601                 err = -EFAULT;
602 out:
603         release_sock(sk);
604         return err;
605 }
606
607 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
608                     char __user *optval, int __user *optlen)
609 {
610         struct dccp_sock *dp;
611         int val, len;
612
613         if (get_user(len, optlen))
614                 return -EFAULT;
615
616         if (len < (int)sizeof(int))
617                 return -EINVAL;
618
619         dp = dccp_sk(sk);
620
621         switch (optname) {
622         case DCCP_SOCKOPT_PACKET_SIZE:
623                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
624                 return 0;
625         case DCCP_SOCKOPT_SERVICE:
626                 return dccp_getsockopt_service(sk, len,
627                                                (__be32 __user *)optval, optlen);
628         case DCCP_SOCKOPT_GET_CUR_MPS:
629                 val = dp->dccps_mss_cache;
630                 break;
631         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
632                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
633         case DCCP_SOCKOPT_TX_CCID:
634                 val = ccid_get_current_tx_ccid(dp);
635                 if (val < 0)
636                         return -ENOPROTOOPT;
637                 break;
638         case DCCP_SOCKOPT_RX_CCID:
639                 val = ccid_get_current_rx_ccid(dp);
640                 if (val < 0)
641                         return -ENOPROTOOPT;
642                 break;
643         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
644                 val = dp->dccps_server_timewait;
645                 break;
646         case DCCP_SOCKOPT_SEND_CSCOV:
647                 val = dp->dccps_pcslen;
648                 break;
649         case DCCP_SOCKOPT_RECV_CSCOV:
650                 val = dp->dccps_pcrlen;
651                 break;
652         case 128 ... 191:
653                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
654                                              len, (u32 __user *)optval, optlen);
655         case 192 ... 255:
656                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
657                                              len, (u32 __user *)optval, optlen);
658         default:
659                 return -ENOPROTOOPT;
660         }
661
662         len = sizeof(val);
663         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
664                 return -EFAULT;
665
666         return 0;
667 }
668
669 int dccp_getsockopt(struct sock *sk, int level, int optname,
670                     char __user *optval, int __user *optlen)
671 {
672         if (level != SOL_DCCP)
673                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
674                                                              optname, optval,
675                                                              optlen);
676         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
677 }
678
679 EXPORT_SYMBOL_GPL(dccp_getsockopt);
680
681 #ifdef CONFIG_COMPAT
682 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
683                            char __user *optval, int __user *optlen)
684 {
685         if (level != SOL_DCCP)
686                 return inet_csk_compat_getsockopt(sk, level, optname,
687                                                   optval, optlen);
688         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
689 }
690
691 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
692 #endif
693
694 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
695                  size_t len)
696 {
697         const struct dccp_sock *dp = dccp_sk(sk);
698         const int flags = msg->msg_flags;
699         const int noblock = flags & MSG_DONTWAIT;
700         struct sk_buff *skb;
701         int rc, size;
702         long timeo;
703
704         if (len > dp->dccps_mss_cache)
705                 return -EMSGSIZE;
706
707         lock_sock(sk);
708
709         if (sysctl_dccp_tx_qlen &&
710             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
711                 rc = -EAGAIN;
712                 goto out_release;
713         }
714
715         timeo = sock_sndtimeo(sk, noblock);
716
717         /*
718          * We have to use sk_stream_wait_connect here to set sk_write_pending,
719          * so that the trick in dccp_rcv_request_sent_state_process.
720          */
721         /* Wait for a connection to finish. */
722         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
723                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
724                         goto out_release;
725
726         size = sk->sk_prot->max_header + len;
727         release_sock(sk);
728         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
729         lock_sock(sk);
730         if (skb == NULL)
731                 goto out_release;
732
733         skb_reserve(skb, sk->sk_prot->max_header);
734         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
735         if (rc != 0)
736                 goto out_discard;
737
738         skb_queue_tail(&sk->sk_write_queue, skb);
739         dccp_write_xmit(sk,0);
740 out_release:
741         release_sock(sk);
742         return rc ? : len;
743 out_discard:
744         kfree_skb(skb);
745         goto out_release;
746 }
747
748 EXPORT_SYMBOL_GPL(dccp_sendmsg);
749
750 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
751                  size_t len, int nonblock, int flags, int *addr_len)
752 {
753         const struct dccp_hdr *dh;
754         long timeo;
755
756         lock_sock(sk);
757
758         if (sk->sk_state == DCCP_LISTEN) {
759                 len = -ENOTCONN;
760                 goto out;
761         }
762
763         timeo = sock_rcvtimeo(sk, nonblock);
764
765         do {
766                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
767
768                 if (skb == NULL)
769                         goto verify_sock_status;
770
771                 dh = dccp_hdr(skb);
772
773                 switch (dh->dccph_type) {
774                 case DCCP_PKT_DATA:
775                 case DCCP_PKT_DATAACK:
776                         goto found_ok_skb;
777
778                 case DCCP_PKT_CLOSE:
779                 case DCCP_PKT_CLOSEREQ:
780                         if (!(flags & MSG_PEEK))
781                                 dccp_finish_passive_close(sk);
782                         /* fall through */
783                 case DCCP_PKT_RESET:
784                         dccp_pr_debug("found fin (%s) ok!\n",
785                                       dccp_packet_name(dh->dccph_type));
786                         len = 0;
787                         goto found_fin_ok;
788                 default:
789                         dccp_pr_debug("packet_type=%s\n",
790                                       dccp_packet_name(dh->dccph_type));
791                         sk_eat_skb(sk, skb, 0);
792                 }
793 verify_sock_status:
794                 if (sock_flag(sk, SOCK_DONE)) {
795                         len = 0;
796                         break;
797                 }
798
799                 if (sk->sk_err) {
800                         len = sock_error(sk);
801                         break;
802                 }
803
804                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
805                         len = 0;
806                         break;
807                 }
808
809                 if (sk->sk_state == DCCP_CLOSED) {
810                         if (!sock_flag(sk, SOCK_DONE)) {
811                                 /* This occurs when user tries to read
812                                  * from never connected socket.
813                                  */
814                                 len = -ENOTCONN;
815                                 break;
816                         }
817                         len = 0;
818                         break;
819                 }
820
821                 if (!timeo) {
822                         len = -EAGAIN;
823                         break;
824                 }
825
826                 if (signal_pending(current)) {
827                         len = sock_intr_errno(timeo);
828                         break;
829                 }
830
831                 sk_wait_data(sk, &timeo);
832                 continue;
833         found_ok_skb:
834                 if (len > skb->len)
835                         len = skb->len;
836                 else if (len < skb->len)
837                         msg->msg_flags |= MSG_TRUNC;
838
839                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
840                         /* Exception. Bailout! */
841                         len = -EFAULT;
842                         break;
843                 }
844         found_fin_ok:
845                 if (!(flags & MSG_PEEK))
846                         sk_eat_skb(sk, skb, 0);
847                 break;
848         } while (1);
849 out:
850         release_sock(sk);
851         return len;
852 }
853
854 EXPORT_SYMBOL_GPL(dccp_recvmsg);
855
856 int inet_dccp_listen(struct socket *sock, int backlog)
857 {
858         struct sock *sk = sock->sk;
859         unsigned char old_state;
860         int err;
861
862         lock_sock(sk);
863
864         err = -EINVAL;
865         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
866                 goto out;
867
868         old_state = sk->sk_state;
869         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
870                 goto out;
871
872         /* Really, if the socket is already in listen state
873          * we can only allow the backlog to be adjusted.
874          */
875         if (old_state != DCCP_LISTEN) {
876                 /*
877                  * FIXME: here it probably should be sk->sk_prot->listen_start
878                  * see tcp_listen_start
879                  */
880                 err = dccp_listen_start(sk, backlog);
881                 if (err)
882                         goto out;
883         }
884         sk->sk_max_ack_backlog = backlog;
885         err = 0;
886
887 out:
888         release_sock(sk);
889         return err;
890 }
891
892 EXPORT_SYMBOL_GPL(inet_dccp_listen);
893
894 static void dccp_terminate_connection(struct sock *sk)
895 {
896         u8 next_state = DCCP_CLOSED;
897
898         switch (sk->sk_state) {
899         case DCCP_PASSIVE_CLOSE:
900         case DCCP_PASSIVE_CLOSEREQ:
901                 dccp_finish_passive_close(sk);
902                 break;
903         case DCCP_PARTOPEN:
904                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
905                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
906                 /* fall through */
907         case DCCP_OPEN:
908                 dccp_send_close(sk, 1);
909
910                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
911                     !dccp_sk(sk)->dccps_server_timewait)
912                         next_state = DCCP_ACTIVE_CLOSEREQ;
913                 else
914                         next_state = DCCP_CLOSING;
915                 /* fall through */
916         default:
917                 dccp_set_state(sk, next_state);
918         }
919 }
920
921 void dccp_close(struct sock *sk, long timeout)
922 {
923         struct dccp_sock *dp = dccp_sk(sk);
924         struct sk_buff *skb;
925         u32 data_was_unread = 0;
926         int state;
927
928         lock_sock(sk);
929
930         sk->sk_shutdown = SHUTDOWN_MASK;
931
932         if (sk->sk_state == DCCP_LISTEN) {
933                 dccp_set_state(sk, DCCP_CLOSED);
934
935                 /* Special case. */
936                 inet_csk_listen_stop(sk);
937
938                 goto adjudge_to_death;
939         }
940
941         sk_stop_timer(sk, &dp->dccps_xmit_timer);
942
943         /*
944          * We need to flush the recv. buffs.  We do this only on the
945          * descriptor close, not protocol-sourced closes, because the
946           *reader process may not have drained the data yet!
947          */
948         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
949                 data_was_unread += skb->len;
950                 __kfree_skb(skb);
951         }
952
953         if (data_was_unread) {
954                 /* Unread data was tossed, send an appropriate Reset Code */
955                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
956                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
957                 dccp_set_state(sk, DCCP_CLOSED);
958         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
959                 /* Check zero linger _after_ checking for unread data. */
960                 sk->sk_prot->disconnect(sk, 0);
961         } else if (sk->sk_state != DCCP_CLOSED) {
962                 dccp_terminate_connection(sk);
963         }
964
965         sk_stream_wait_close(sk, timeout);
966
967 adjudge_to_death:
968         state = sk->sk_state;
969         sock_hold(sk);
970         sock_orphan(sk);
971         atomic_inc(sk->sk_prot->orphan_count);
972
973         /*
974          * It is the last release_sock in its life. It will remove backlog.
975          */
976         release_sock(sk);
977         /*
978          * Now socket is owned by kernel and we acquire BH lock
979          * to finish close. No need to check for user refs.
980          */
981         local_bh_disable();
982         bh_lock_sock(sk);
983         WARN_ON(sock_owned_by_user(sk));
984
985         /* Have we already been destroyed by a softirq or backlog? */
986         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
987                 goto out;
988
989         if (sk->sk_state == DCCP_CLOSED)
990                 inet_csk_destroy_sock(sk);
991
992         /* Otherwise, socket is reprieved until protocol close. */
993
994 out:
995         bh_unlock_sock(sk);
996         local_bh_enable();
997         sock_put(sk);
998 }
999
1000 EXPORT_SYMBOL_GPL(dccp_close);
1001
1002 void dccp_shutdown(struct sock *sk, int how)
1003 {
1004         dccp_pr_debug("called shutdown(%x)\n", how);
1005 }
1006
1007 EXPORT_SYMBOL_GPL(dccp_shutdown);
1008
1009 static inline int dccp_mib_init(void)
1010 {
1011         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1012 }
1013
1014 static inline void dccp_mib_exit(void)
1015 {
1016         snmp_mib_free((void**)dccp_statistics);
1017 }
1018
1019 static int thash_entries;
1020 module_param(thash_entries, int, 0444);
1021 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1022
1023 #ifdef CONFIG_IP_DCCP_DEBUG
1024 int dccp_debug;
1025 module_param(dccp_debug, bool, 0644);
1026 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1027
1028 EXPORT_SYMBOL_GPL(dccp_debug);
1029 #endif
1030
1031 static int __init dccp_init(void)
1032 {
1033         unsigned long goal;
1034         int ehash_order, bhash_order, i;
1035         int rc = -ENOBUFS;
1036
1037         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1038                      FIELD_SIZEOF(struct sk_buff, cb));
1039
1040         dccp_hashinfo.bind_bucket_cachep =
1041                 kmem_cache_create("dccp_bind_bucket",
1042                                   sizeof(struct inet_bind_bucket), 0,
1043                                   SLAB_HWCACHE_ALIGN, NULL);
1044         if (!dccp_hashinfo.bind_bucket_cachep)
1045                 goto out;
1046
1047         /*
1048          * Size and allocate the main established and bind bucket
1049          * hash tables.
1050          *
1051          * The methodology is similar to that of the buffer cache.
1052          */
1053         if (num_physpages >= (128 * 1024))
1054                 goal = num_physpages >> (21 - PAGE_SHIFT);
1055         else
1056                 goal = num_physpages >> (23 - PAGE_SHIFT);
1057
1058         if (thash_entries)
1059                 goal = (thash_entries *
1060                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1061         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1062                 ;
1063         do {
1064                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1065                                         sizeof(struct inet_ehash_bucket);
1066                 while (dccp_hashinfo.ehash_size &
1067                        (dccp_hashinfo.ehash_size - 1))
1068                         dccp_hashinfo.ehash_size--;
1069                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1070                         __get_free_pages(GFP_ATOMIC, ehash_order);
1071         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1072
1073         if (!dccp_hashinfo.ehash) {
1074                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1075                 goto out_free_bind_bucket_cachep;
1076         }
1077
1078         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1079                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1080                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1081         }
1082
1083         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1084                         goto out_free_dccp_ehash;
1085
1086         bhash_order = ehash_order;
1087
1088         do {
1089                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1090                                         sizeof(struct inet_bind_hashbucket);
1091                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1092                     bhash_order > 0)
1093                         continue;
1094                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1095                         __get_free_pages(GFP_ATOMIC, bhash_order);
1096         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1097
1098         if (!dccp_hashinfo.bhash) {
1099                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1100                 goto out_free_dccp_locks;
1101         }
1102
1103         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1104                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1105                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1106         }
1107
1108         rc = dccp_mib_init();
1109         if (rc)
1110                 goto out_free_dccp_bhash;
1111
1112         rc = dccp_ackvec_init();
1113         if (rc)
1114                 goto out_free_dccp_mib;
1115
1116         rc = dccp_sysctl_init();
1117         if (rc)
1118                 goto out_ackvec_exit;
1119
1120         dccp_timestamping_init();
1121 out:
1122         return rc;
1123 out_ackvec_exit:
1124         dccp_ackvec_exit();
1125 out_free_dccp_mib:
1126         dccp_mib_exit();
1127 out_free_dccp_bhash:
1128         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1129         dccp_hashinfo.bhash = NULL;
1130 out_free_dccp_locks:
1131         inet_ehash_locks_free(&dccp_hashinfo);
1132 out_free_dccp_ehash:
1133         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1134         dccp_hashinfo.ehash = NULL;
1135 out_free_bind_bucket_cachep:
1136         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1137         dccp_hashinfo.bind_bucket_cachep = NULL;
1138         goto out;
1139 }
1140
1141 static void __exit dccp_fini(void)
1142 {
1143         dccp_mib_exit();
1144         free_pages((unsigned long)dccp_hashinfo.bhash,
1145                    get_order(dccp_hashinfo.bhash_size *
1146                              sizeof(struct inet_bind_hashbucket)));
1147         free_pages((unsigned long)dccp_hashinfo.ehash,
1148                    get_order(dccp_hashinfo.ehash_size *
1149                              sizeof(struct inet_ehash_bucket)));
1150         inet_ehash_locks_free(&dccp_hashinfo);
1151         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1152         dccp_ackvec_exit();
1153         dccp_sysctl_exit();
1154 }
1155
1156 module_init(dccp_init);
1157 module_exit(dccp_fini);
1158
1159 MODULE_LICENSE("GPL");
1160 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1161 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");