dccp: Integration of dynamic feature activation - part 1 (socket setup)
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 struct percpu_counter dccp_orphan_count;
44 EXPORT_SYMBOL_GPL(dccp_orphan_count);
45
46 struct inet_hashinfo dccp_hashinfo;
47 EXPORT_SYMBOL_GPL(dccp_hashinfo);
48
49 /* the maximum queue length for tx in packets. 0 is no limit */
50 int sysctl_dccp_tx_qlen __read_mostly = 5;
51
52 void dccp_set_state(struct sock *sk, const int state)
53 {
54         const int oldstate = sk->sk_state;
55
56         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
57                       dccp_state_name(oldstate), dccp_state_name(state));
58         WARN_ON(state == oldstate);
59
60         switch (state) {
61         case DCCP_OPEN:
62                 if (oldstate != DCCP_OPEN)
63                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
64                 /* Client retransmits all Confirm options until entering OPEN */
65                 if (oldstate == DCCP_PARTOPEN)
66                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
67                 break;
68
69         case DCCP_CLOSED:
70                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
71                     oldstate == DCCP_CLOSING)
72                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74                 sk->sk_prot->unhash(sk);
75                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77                         inet_put_port(sk);
78                 /* fall through */
79         default:
80                 if (oldstate == DCCP_OPEN)
81                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82         }
83
84         /* Change state AFTER socket is unhashed to avoid closed
85          * socket sitting in hash tables.
86          */
87         sk->sk_state = state;
88 }
89
90 EXPORT_SYMBOL_GPL(dccp_set_state);
91
92 static void dccp_finish_passive_close(struct sock *sk)
93 {
94         switch (sk->sk_state) {
95         case DCCP_PASSIVE_CLOSE:
96                 /* Node (client or server) has received Close packet. */
97                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
98                 dccp_set_state(sk, DCCP_CLOSED);
99                 break;
100         case DCCP_PASSIVE_CLOSEREQ:
101                 /*
102                  * Client received CloseReq. We set the `active' flag so that
103                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
104                  */
105                 dccp_send_close(sk, 1);
106                 dccp_set_state(sk, DCCP_CLOSING);
107         }
108 }
109
110 void dccp_done(struct sock *sk)
111 {
112         dccp_set_state(sk, DCCP_CLOSED);
113         dccp_clear_xmit_timers(sk);
114
115         sk->sk_shutdown = SHUTDOWN_MASK;
116
117         if (!sock_flag(sk, SOCK_DEAD))
118                 sk->sk_state_change(sk);
119         else
120                 inet_csk_destroy_sock(sk);
121 }
122
123 EXPORT_SYMBOL_GPL(dccp_done);
124
125 const char *dccp_packet_name(const int type)
126 {
127         static const char *dccp_packet_names[] = {
128                 [DCCP_PKT_REQUEST]  = "REQUEST",
129                 [DCCP_PKT_RESPONSE] = "RESPONSE",
130                 [DCCP_PKT_DATA]     = "DATA",
131                 [DCCP_PKT_ACK]      = "ACK",
132                 [DCCP_PKT_DATAACK]  = "DATAACK",
133                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
134                 [DCCP_PKT_CLOSE]    = "CLOSE",
135                 [DCCP_PKT_RESET]    = "RESET",
136                 [DCCP_PKT_SYNC]     = "SYNC",
137                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
138         };
139
140         if (type >= DCCP_NR_PKT_TYPES)
141                 return "INVALID";
142         else
143                 return dccp_packet_names[type];
144 }
145
146 EXPORT_SYMBOL_GPL(dccp_packet_name);
147
148 const char *dccp_state_name(const int state)
149 {
150         static char *dccp_state_names[] = {
151         [DCCP_OPEN]             = "OPEN",
152         [DCCP_REQUESTING]       = "REQUESTING",
153         [DCCP_PARTOPEN]         = "PARTOPEN",
154         [DCCP_LISTEN]           = "LISTEN",
155         [DCCP_RESPOND]          = "RESPOND",
156         [DCCP_CLOSING]          = "CLOSING",
157         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
158         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
159         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
160         [DCCP_TIME_WAIT]        = "TIME_WAIT",
161         [DCCP_CLOSED]           = "CLOSED",
162         };
163
164         if (state >= DCCP_MAX_STATES)
165                 return "INVALID STATE!";
166         else
167                 return dccp_state_names[state];
168 }
169
170 EXPORT_SYMBOL_GPL(dccp_state_name);
171
172 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
173 {
174         struct dccp_sock *dp = dccp_sk(sk);
175         struct inet_connection_sock *icsk = inet_csk(sk);
176
177         dccp_minisock_init(&dp->dccps_minisock);
178
179         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
180         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
181         sk->sk_state            = DCCP_CLOSED;
182         sk->sk_write_space      = dccp_write_space;
183         icsk->icsk_sync_mss     = dccp_sync_mss;
184         dp->dccps_mss_cache     = 536;
185         dp->dccps_rate_last     = jiffies;
186         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
187         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
188         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
189
190         dccp_init_xmit_timers(sk);
191
192         INIT_LIST_HEAD(&dp->dccps_featneg);
193         /* control socket doesn't need feat nego */
194         if (likely(ctl_sock_initialized))
195                 return dccp_feat_init(sk);
196         return 0;
197 }
198
199 EXPORT_SYMBOL_GPL(dccp_init_sock);
200
201 void dccp_destroy_sock(struct sock *sk)
202 {
203         struct dccp_sock *dp = dccp_sk(sk);
204         struct dccp_minisock *dmsk = dccp_msk(sk);
205
206         /*
207          * DCCP doesn't use sk_write_queue, just sk_send_head
208          * for retransmissions
209          */
210         if (sk->sk_send_head != NULL) {
211                 kfree_skb(sk->sk_send_head);
212                 sk->sk_send_head = NULL;
213         }
214
215         /* Clean up a referenced DCCP bind bucket. */
216         if (inet_csk(sk)->icsk_bind_hash != NULL)
217                 inet_put_port(sk);
218
219         kfree(dp->dccps_service_list);
220         dp->dccps_service_list = NULL;
221
222         if (dmsk->dccpms_send_ack_vector) {
223                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224                 dp->dccps_hc_rx_ackvec = NULL;
225         }
226         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
227         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
228         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
229
230         /* clean up feature negotiation state */
231         dccp_feat_list_purge(&dp->dccps_featneg);
232 }
233
234 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
235
236 static inline int dccp_listen_start(struct sock *sk, int backlog)
237 {
238         struct dccp_sock *dp = dccp_sk(sk);
239
240         dp->dccps_role = DCCP_ROLE_LISTEN;
241         /* do not start to listen if feature negotiation setup fails */
242         if (dccp_feat_finalise_settings(dp))
243                 return -EPROTO;
244         return inet_csk_listen_start(sk, backlog);
245 }
246
247 static inline int dccp_need_reset(int state)
248 {
249         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
250                state != DCCP_REQUESTING;
251 }
252
253 int dccp_disconnect(struct sock *sk, int flags)
254 {
255         struct inet_connection_sock *icsk = inet_csk(sk);
256         struct inet_sock *inet = inet_sk(sk);
257         int err = 0;
258         const int old_state = sk->sk_state;
259
260         if (old_state != DCCP_CLOSED)
261                 dccp_set_state(sk, DCCP_CLOSED);
262
263         /*
264          * This corresponds to the ABORT function of RFC793, sec. 3.8
265          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
266          */
267         if (old_state == DCCP_LISTEN) {
268                 inet_csk_listen_stop(sk);
269         } else if (dccp_need_reset(old_state)) {
270                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
271                 sk->sk_err = ECONNRESET;
272         } else if (old_state == DCCP_REQUESTING)
273                 sk->sk_err = ECONNRESET;
274
275         dccp_clear_xmit_timers(sk);
276
277         __skb_queue_purge(&sk->sk_receive_queue);
278         __skb_queue_purge(&sk->sk_write_queue);
279         if (sk->sk_send_head != NULL) {
280                 __kfree_skb(sk->sk_send_head);
281                 sk->sk_send_head = NULL;
282         }
283
284         inet->dport = 0;
285
286         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
287                 inet_reset_saddr(sk);
288
289         sk->sk_shutdown = 0;
290         sock_reset_flag(sk, SOCK_DONE);
291
292         icsk->icsk_backoff = 0;
293         inet_csk_delack_init(sk);
294         __sk_dst_reset(sk);
295
296         WARN_ON(inet->num && !icsk->icsk_bind_hash);
297
298         sk->sk_error_report(sk);
299         return err;
300 }
301
302 EXPORT_SYMBOL_GPL(dccp_disconnect);
303
304 /*
305  *      Wait for a DCCP event.
306  *
307  *      Note that we don't need to lock the socket, as the upper poll layers
308  *      take care of normal races (between the test and the event) and we don't
309  *      go look at any of the socket buffers directly.
310  */
311 unsigned int dccp_poll(struct file *file, struct socket *sock,
312                        poll_table *wait)
313 {
314         unsigned int mask;
315         struct sock *sk = sock->sk;
316
317         poll_wait(file, sk->sk_sleep, wait);
318         if (sk->sk_state == DCCP_LISTEN)
319                 return inet_csk_listen_poll(sk);
320
321         /* Socket is not locked. We are protected from async events
322            by poll logic and correct handling of state changes
323            made by another threads is impossible in any case.
324          */
325
326         mask = 0;
327         if (sk->sk_err)
328                 mask = POLLERR;
329
330         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
331                 mask |= POLLHUP;
332         if (sk->sk_shutdown & RCV_SHUTDOWN)
333                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
334
335         /* Connected? */
336         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
337                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
338                         mask |= POLLIN | POLLRDNORM;
339
340                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
341                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
342                                 mask |= POLLOUT | POLLWRNORM;
343                         } else {  /* send SIGIO later */
344                                 set_bit(SOCK_ASYNC_NOSPACE,
345                                         &sk->sk_socket->flags);
346                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
347
348                                 /* Race breaker. If space is freed after
349                                  * wspace test but before the flags are set,
350                                  * IO signal will be lost.
351                                  */
352                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
353                                         mask |= POLLOUT | POLLWRNORM;
354                         }
355                 }
356         }
357         return mask;
358 }
359
360 EXPORT_SYMBOL_GPL(dccp_poll);
361
362 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
363 {
364         int rc = -ENOTCONN;
365
366         lock_sock(sk);
367
368         if (sk->sk_state == DCCP_LISTEN)
369                 goto out;
370
371         switch (cmd) {
372         case SIOCINQ: {
373                 struct sk_buff *skb;
374                 unsigned long amount = 0;
375
376                 skb = skb_peek(&sk->sk_receive_queue);
377                 if (skb != NULL) {
378                         /*
379                          * We will only return the amount of this packet since
380                          * that is all that will be read.
381                          */
382                         amount = skb->len;
383                 }
384                 rc = put_user(amount, (int __user *)arg);
385         }
386                 break;
387         default:
388                 rc = -ENOIOCTLCMD;
389                 break;
390         }
391 out:
392         release_sock(sk);
393         return rc;
394 }
395
396 EXPORT_SYMBOL_GPL(dccp_ioctl);
397
398 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
399                                    char __user *optval, int optlen)
400 {
401         struct dccp_sock *dp = dccp_sk(sk);
402         struct dccp_service_list *sl = NULL;
403
404         if (service == DCCP_SERVICE_INVALID_VALUE ||
405             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
406                 return -EINVAL;
407
408         if (optlen > sizeof(service)) {
409                 sl = kmalloc(optlen, GFP_KERNEL);
410                 if (sl == NULL)
411                         return -ENOMEM;
412
413                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
414                 if (copy_from_user(sl->dccpsl_list,
415                                    optval + sizeof(service),
416                                    optlen - sizeof(service)) ||
417                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
418                         kfree(sl);
419                         return -EFAULT;
420                 }
421         }
422
423         lock_sock(sk);
424         dp->dccps_service = service;
425
426         kfree(dp->dccps_service_list);
427
428         dp->dccps_service_list = sl;
429         release_sock(sk);
430         return 0;
431 }
432
433 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
434 {
435         u8 *list, len;
436         int i, rc;
437
438         if (cscov < 0 || cscov > 15)
439                 return -EINVAL;
440         /*
441          * Populate a list of permissible values, in the range cscov...15. This
442          * is necessary since feature negotiation of single values only works if
443          * both sides incidentally choose the same value. Since the list starts
444          * lowest-value first, negotiation will pick the smallest shared value.
445          */
446         if (cscov == 0)
447                 return 0;
448         len = 16 - cscov;
449
450         list = kmalloc(len, GFP_KERNEL);
451         if (list == NULL)
452                 return -ENOBUFS;
453
454         for (i = 0; i < len; i++)
455                 list[i] = cscov++;
456
457         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
458
459         if (rc == 0) {
460                 if (rx)
461                         dccp_sk(sk)->dccps_pcrlen = cscov;
462                 else
463                         dccp_sk(sk)->dccps_pcslen = cscov;
464         }
465         kfree(list);
466         return rc;
467 }
468
469 static int dccp_setsockopt_ccid(struct sock *sk, int type,
470                                 char __user *optval, int optlen)
471 {
472         u8 *val;
473         int rc = 0;
474
475         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
476                 return -EINVAL;
477
478         val = kmalloc(optlen, GFP_KERNEL);
479         if (val == NULL)
480                 return -ENOMEM;
481
482         if (copy_from_user(val, optval, optlen)) {
483                 kfree(val);
484                 return -EFAULT;
485         }
486
487         lock_sock(sk);
488         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
489                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
490
491         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
492                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
493         release_sock(sk);
494
495         kfree(val);
496         return rc;
497 }
498
499 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
500                 char __user *optval, int optlen)
501 {
502         struct dccp_sock *dp = dccp_sk(sk);
503         int val, err = 0;
504
505         switch (optname) {
506         case DCCP_SOCKOPT_PACKET_SIZE:
507                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
508                 return 0;
509         case DCCP_SOCKOPT_CHANGE_L:
510         case DCCP_SOCKOPT_CHANGE_R:
511                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
512                 return 0;
513         case DCCP_SOCKOPT_CCID:
514         case DCCP_SOCKOPT_RX_CCID:
515         case DCCP_SOCKOPT_TX_CCID:
516                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
517         }
518
519         if (optlen < (int)sizeof(int))
520                 return -EINVAL;
521
522         if (get_user(val, (int __user *)optval))
523                 return -EFAULT;
524
525         if (optname == DCCP_SOCKOPT_SERVICE)
526                 return dccp_setsockopt_service(sk, val, optval, optlen);
527
528         lock_sock(sk);
529         switch (optname) {
530         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
531                 if (dp->dccps_role != DCCP_ROLE_SERVER)
532                         err = -EOPNOTSUPP;
533                 else
534                         dp->dccps_server_timewait = (val != 0);
535                 break;
536         case DCCP_SOCKOPT_SEND_CSCOV:
537                 err = dccp_setsockopt_cscov(sk, val, false);
538                 break;
539         case DCCP_SOCKOPT_RECV_CSCOV:
540                 err = dccp_setsockopt_cscov(sk, val, true);
541                 break;
542         default:
543                 err = -ENOPROTOOPT;
544                 break;
545         }
546         release_sock(sk);
547
548         return err;
549 }
550
551 int dccp_setsockopt(struct sock *sk, int level, int optname,
552                     char __user *optval, int optlen)
553 {
554         if (level != SOL_DCCP)
555                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
556                                                              optname, optval,
557                                                              optlen);
558         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
559 }
560
561 EXPORT_SYMBOL_GPL(dccp_setsockopt);
562
563 #ifdef CONFIG_COMPAT
564 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
565                            char __user *optval, int optlen)
566 {
567         if (level != SOL_DCCP)
568                 return inet_csk_compat_setsockopt(sk, level, optname,
569                                                   optval, optlen);
570         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
571 }
572
573 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
574 #endif
575
576 static int dccp_getsockopt_service(struct sock *sk, int len,
577                                    __be32 __user *optval,
578                                    int __user *optlen)
579 {
580         const struct dccp_sock *dp = dccp_sk(sk);
581         const struct dccp_service_list *sl;
582         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
583
584         lock_sock(sk);
585         if ((sl = dp->dccps_service_list) != NULL) {
586                 slen = sl->dccpsl_nr * sizeof(u32);
587                 total_len += slen;
588         }
589
590         err = -EINVAL;
591         if (total_len > len)
592                 goto out;
593
594         err = 0;
595         if (put_user(total_len, optlen) ||
596             put_user(dp->dccps_service, optval) ||
597             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
598                 err = -EFAULT;
599 out:
600         release_sock(sk);
601         return err;
602 }
603
604 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
605                     char __user *optval, int __user *optlen)
606 {
607         struct dccp_sock *dp;
608         int val, len;
609
610         if (get_user(len, optlen))
611                 return -EFAULT;
612
613         if (len < (int)sizeof(int))
614                 return -EINVAL;
615
616         dp = dccp_sk(sk);
617
618         switch (optname) {
619         case DCCP_SOCKOPT_PACKET_SIZE:
620                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
621                 return 0;
622         case DCCP_SOCKOPT_SERVICE:
623                 return dccp_getsockopt_service(sk, len,
624                                                (__be32 __user *)optval, optlen);
625         case DCCP_SOCKOPT_GET_CUR_MPS:
626                 val = dp->dccps_mss_cache;
627                 break;
628         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
629                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
630         case DCCP_SOCKOPT_TX_CCID:
631                 val = ccid_get_current_tx_ccid(dp);
632                 if (val < 0)
633                         return -ENOPROTOOPT;
634                 break;
635         case DCCP_SOCKOPT_RX_CCID:
636                 val = ccid_get_current_rx_ccid(dp);
637                 if (val < 0)
638                         return -ENOPROTOOPT;
639                 break;
640         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
641                 val = dp->dccps_server_timewait;
642                 break;
643         case DCCP_SOCKOPT_SEND_CSCOV:
644                 val = dp->dccps_pcslen;
645                 break;
646         case DCCP_SOCKOPT_RECV_CSCOV:
647                 val = dp->dccps_pcrlen;
648                 break;
649         case 128 ... 191:
650                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
651                                              len, (u32 __user *)optval, optlen);
652         case 192 ... 255:
653                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
654                                              len, (u32 __user *)optval, optlen);
655         default:
656                 return -ENOPROTOOPT;
657         }
658
659         len = sizeof(val);
660         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
661                 return -EFAULT;
662
663         return 0;
664 }
665
666 int dccp_getsockopt(struct sock *sk, int level, int optname,
667                     char __user *optval, int __user *optlen)
668 {
669         if (level != SOL_DCCP)
670                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
671                                                              optname, optval,
672                                                              optlen);
673         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
674 }
675
676 EXPORT_SYMBOL_GPL(dccp_getsockopt);
677
678 #ifdef CONFIG_COMPAT
679 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
680                            char __user *optval, int __user *optlen)
681 {
682         if (level != SOL_DCCP)
683                 return inet_csk_compat_getsockopt(sk, level, optname,
684                                                   optval, optlen);
685         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
686 }
687
688 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
689 #endif
690
691 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
692                  size_t len)
693 {
694         const struct dccp_sock *dp = dccp_sk(sk);
695         const int flags = msg->msg_flags;
696         const int noblock = flags & MSG_DONTWAIT;
697         struct sk_buff *skb;
698         int rc, size;
699         long timeo;
700
701         if (len > dp->dccps_mss_cache)
702                 return -EMSGSIZE;
703
704         lock_sock(sk);
705
706         if (sysctl_dccp_tx_qlen &&
707             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
708                 rc = -EAGAIN;
709                 goto out_release;
710         }
711
712         timeo = sock_sndtimeo(sk, noblock);
713
714         /*
715          * We have to use sk_stream_wait_connect here to set sk_write_pending,
716          * so that the trick in dccp_rcv_request_sent_state_process.
717          */
718         /* Wait for a connection to finish. */
719         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
720                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
721                         goto out_release;
722
723         size = sk->sk_prot->max_header + len;
724         release_sock(sk);
725         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
726         lock_sock(sk);
727         if (skb == NULL)
728                 goto out_release;
729
730         skb_reserve(skb, sk->sk_prot->max_header);
731         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
732         if (rc != 0)
733                 goto out_discard;
734
735         skb_queue_tail(&sk->sk_write_queue, skb);
736         dccp_write_xmit(sk,0);
737 out_release:
738         release_sock(sk);
739         return rc ? : len;
740 out_discard:
741         kfree_skb(skb);
742         goto out_release;
743 }
744
745 EXPORT_SYMBOL_GPL(dccp_sendmsg);
746
747 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
748                  size_t len, int nonblock, int flags, int *addr_len)
749 {
750         const struct dccp_hdr *dh;
751         long timeo;
752
753         lock_sock(sk);
754
755         if (sk->sk_state == DCCP_LISTEN) {
756                 len = -ENOTCONN;
757                 goto out;
758         }
759
760         timeo = sock_rcvtimeo(sk, nonblock);
761
762         do {
763                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
764
765                 if (skb == NULL)
766                         goto verify_sock_status;
767
768                 dh = dccp_hdr(skb);
769
770                 switch (dh->dccph_type) {
771                 case DCCP_PKT_DATA:
772                 case DCCP_PKT_DATAACK:
773                         goto found_ok_skb;
774
775                 case DCCP_PKT_CLOSE:
776                 case DCCP_PKT_CLOSEREQ:
777                         if (!(flags & MSG_PEEK))
778                                 dccp_finish_passive_close(sk);
779                         /* fall through */
780                 case DCCP_PKT_RESET:
781                         dccp_pr_debug("found fin (%s) ok!\n",
782                                       dccp_packet_name(dh->dccph_type));
783                         len = 0;
784                         goto found_fin_ok;
785                 default:
786                         dccp_pr_debug("packet_type=%s\n",
787                                       dccp_packet_name(dh->dccph_type));
788                         sk_eat_skb(sk, skb, 0);
789                 }
790 verify_sock_status:
791                 if (sock_flag(sk, SOCK_DONE)) {
792                         len = 0;
793                         break;
794                 }
795
796                 if (sk->sk_err) {
797                         len = sock_error(sk);
798                         break;
799                 }
800
801                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
802                         len = 0;
803                         break;
804                 }
805
806                 if (sk->sk_state == DCCP_CLOSED) {
807                         if (!sock_flag(sk, SOCK_DONE)) {
808                                 /* This occurs when user tries to read
809                                  * from never connected socket.
810                                  */
811                                 len = -ENOTCONN;
812                                 break;
813                         }
814                         len = 0;
815                         break;
816                 }
817
818                 if (!timeo) {
819                         len = -EAGAIN;
820                         break;
821                 }
822
823                 if (signal_pending(current)) {
824                         len = sock_intr_errno(timeo);
825                         break;
826                 }
827
828                 sk_wait_data(sk, &timeo);
829                 continue;
830         found_ok_skb:
831                 if (len > skb->len)
832                         len = skb->len;
833                 else if (len < skb->len)
834                         msg->msg_flags |= MSG_TRUNC;
835
836                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
837                         /* Exception. Bailout! */
838                         len = -EFAULT;
839                         break;
840                 }
841         found_fin_ok:
842                 if (!(flags & MSG_PEEK))
843                         sk_eat_skb(sk, skb, 0);
844                 break;
845         } while (1);
846 out:
847         release_sock(sk);
848         return len;
849 }
850
851 EXPORT_SYMBOL_GPL(dccp_recvmsg);
852
853 int inet_dccp_listen(struct socket *sock, int backlog)
854 {
855         struct sock *sk = sock->sk;
856         unsigned char old_state;
857         int err;
858
859         lock_sock(sk);
860
861         err = -EINVAL;
862         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
863                 goto out;
864
865         old_state = sk->sk_state;
866         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
867                 goto out;
868
869         /* Really, if the socket is already in listen state
870          * we can only allow the backlog to be adjusted.
871          */
872         if (old_state != DCCP_LISTEN) {
873                 /*
874                  * FIXME: here it probably should be sk->sk_prot->listen_start
875                  * see tcp_listen_start
876                  */
877                 err = dccp_listen_start(sk, backlog);
878                 if (err)
879                         goto out;
880         }
881         sk->sk_max_ack_backlog = backlog;
882         err = 0;
883
884 out:
885         release_sock(sk);
886         return err;
887 }
888
889 EXPORT_SYMBOL_GPL(inet_dccp_listen);
890
891 static void dccp_terminate_connection(struct sock *sk)
892 {
893         u8 next_state = DCCP_CLOSED;
894
895         switch (sk->sk_state) {
896         case DCCP_PASSIVE_CLOSE:
897         case DCCP_PASSIVE_CLOSEREQ:
898                 dccp_finish_passive_close(sk);
899                 break;
900         case DCCP_PARTOPEN:
901                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
902                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
903                 /* fall through */
904         case DCCP_OPEN:
905                 dccp_send_close(sk, 1);
906
907                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
908                     !dccp_sk(sk)->dccps_server_timewait)
909                         next_state = DCCP_ACTIVE_CLOSEREQ;
910                 else
911                         next_state = DCCP_CLOSING;
912                 /* fall through */
913         default:
914                 dccp_set_state(sk, next_state);
915         }
916 }
917
918 void dccp_close(struct sock *sk, long timeout)
919 {
920         struct dccp_sock *dp = dccp_sk(sk);
921         struct sk_buff *skb;
922         u32 data_was_unread = 0;
923         int state;
924
925         lock_sock(sk);
926
927         sk->sk_shutdown = SHUTDOWN_MASK;
928
929         if (sk->sk_state == DCCP_LISTEN) {
930                 dccp_set_state(sk, DCCP_CLOSED);
931
932                 /* Special case. */
933                 inet_csk_listen_stop(sk);
934
935                 goto adjudge_to_death;
936         }
937
938         sk_stop_timer(sk, &dp->dccps_xmit_timer);
939
940         /*
941          * We need to flush the recv. buffs.  We do this only on the
942          * descriptor close, not protocol-sourced closes, because the
943           *reader process may not have drained the data yet!
944          */
945         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
946                 data_was_unread += skb->len;
947                 __kfree_skb(skb);
948         }
949
950         if (data_was_unread) {
951                 /* Unread data was tossed, send an appropriate Reset Code */
952                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
953                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
954                 dccp_set_state(sk, DCCP_CLOSED);
955         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
956                 /* Check zero linger _after_ checking for unread data. */
957                 sk->sk_prot->disconnect(sk, 0);
958         } else if (sk->sk_state != DCCP_CLOSED) {
959                 dccp_terminate_connection(sk);
960         }
961
962         sk_stream_wait_close(sk, timeout);
963
964 adjudge_to_death:
965         state = sk->sk_state;
966         sock_hold(sk);
967         sock_orphan(sk);
968         percpu_counter_inc(sk->sk_prot->orphan_count);
969
970         /*
971          * It is the last release_sock in its life. It will remove backlog.
972          */
973         release_sock(sk);
974         /*
975          * Now socket is owned by kernel and we acquire BH lock
976          * to finish close. No need to check for user refs.
977          */
978         local_bh_disable();
979         bh_lock_sock(sk);
980         WARN_ON(sock_owned_by_user(sk));
981
982         /* Have we already been destroyed by a softirq or backlog? */
983         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
984                 goto out;
985
986         if (sk->sk_state == DCCP_CLOSED)
987                 inet_csk_destroy_sock(sk);
988
989         /* Otherwise, socket is reprieved until protocol close. */
990
991 out:
992         bh_unlock_sock(sk);
993         local_bh_enable();
994         sock_put(sk);
995 }
996
997 EXPORT_SYMBOL_GPL(dccp_close);
998
999 void dccp_shutdown(struct sock *sk, int how)
1000 {
1001         dccp_pr_debug("called shutdown(%x)\n", how);
1002 }
1003
1004 EXPORT_SYMBOL_GPL(dccp_shutdown);
1005
1006 static inline int dccp_mib_init(void)
1007 {
1008         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1009 }
1010
1011 static inline void dccp_mib_exit(void)
1012 {
1013         snmp_mib_free((void**)dccp_statistics);
1014 }
1015
1016 static int thash_entries;
1017 module_param(thash_entries, int, 0444);
1018 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1019
1020 #ifdef CONFIG_IP_DCCP_DEBUG
1021 int dccp_debug;
1022 module_param(dccp_debug, bool, 0644);
1023 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1024
1025 EXPORT_SYMBOL_GPL(dccp_debug);
1026 #endif
1027
1028 static int __init dccp_init(void)
1029 {
1030         unsigned long goal;
1031         int ehash_order, bhash_order, i;
1032         int rc;
1033
1034         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1035                      FIELD_SIZEOF(struct sk_buff, cb));
1036         rc = percpu_counter_init(&dccp_orphan_count, 0);
1037         if (rc)
1038                 goto out;
1039         rc = -ENOBUFS;
1040         inet_hashinfo_init(&dccp_hashinfo);
1041         dccp_hashinfo.bind_bucket_cachep =
1042                 kmem_cache_create("dccp_bind_bucket",
1043                                   sizeof(struct inet_bind_bucket), 0,
1044                                   SLAB_HWCACHE_ALIGN, NULL);
1045         if (!dccp_hashinfo.bind_bucket_cachep)
1046                 goto out_free_percpu;
1047
1048         /*
1049          * Size and allocate the main established and bind bucket
1050          * hash tables.
1051          *
1052          * The methodology is similar to that of the buffer cache.
1053          */
1054         if (num_physpages >= (128 * 1024))
1055                 goal = num_physpages >> (21 - PAGE_SHIFT);
1056         else
1057                 goal = num_physpages >> (23 - PAGE_SHIFT);
1058
1059         if (thash_entries)
1060                 goal = (thash_entries *
1061                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1062         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1063                 ;
1064         do {
1065                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1066                                         sizeof(struct inet_ehash_bucket);
1067                 while (dccp_hashinfo.ehash_size &
1068                        (dccp_hashinfo.ehash_size - 1))
1069                         dccp_hashinfo.ehash_size--;
1070                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1071                         __get_free_pages(GFP_ATOMIC, ehash_order);
1072         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1073
1074         if (!dccp_hashinfo.ehash) {
1075                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1076                 goto out_free_bind_bucket_cachep;
1077         }
1078
1079         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1080                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1081                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1082         }
1083
1084         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1085                         goto out_free_dccp_ehash;
1086
1087         bhash_order = ehash_order;
1088
1089         do {
1090                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1091                                         sizeof(struct inet_bind_hashbucket);
1092                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1093                     bhash_order > 0)
1094                         continue;
1095                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1096                         __get_free_pages(GFP_ATOMIC, bhash_order);
1097         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1098
1099         if (!dccp_hashinfo.bhash) {
1100                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1101                 goto out_free_dccp_locks;
1102         }
1103
1104         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1105                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1106                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1107         }
1108
1109         rc = dccp_mib_init();
1110         if (rc)
1111                 goto out_free_dccp_bhash;
1112
1113         rc = dccp_ackvec_init();
1114         if (rc)
1115                 goto out_free_dccp_mib;
1116
1117         rc = dccp_sysctl_init();
1118         if (rc)
1119                 goto out_ackvec_exit;
1120
1121         dccp_timestamping_init();
1122 out:
1123         return rc;
1124 out_ackvec_exit:
1125         dccp_ackvec_exit();
1126 out_free_dccp_mib:
1127         dccp_mib_exit();
1128 out_free_dccp_bhash:
1129         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1130         dccp_hashinfo.bhash = NULL;
1131 out_free_dccp_locks:
1132         inet_ehash_locks_free(&dccp_hashinfo);
1133 out_free_dccp_ehash:
1134         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1135         dccp_hashinfo.ehash = NULL;
1136 out_free_bind_bucket_cachep:
1137         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1138         dccp_hashinfo.bind_bucket_cachep = NULL;
1139 out_free_percpu:
1140         percpu_counter_destroy(&dccp_orphan_count);
1141         goto out;
1142 }
1143
1144 static void __exit dccp_fini(void)
1145 {
1146         dccp_mib_exit();
1147         free_pages((unsigned long)dccp_hashinfo.bhash,
1148                    get_order(dccp_hashinfo.bhash_size *
1149                              sizeof(struct inet_bind_hashbucket)));
1150         free_pages((unsigned long)dccp_hashinfo.ehash,
1151                    get_order(dccp_hashinfo.ehash_size *
1152                              sizeof(struct inet_ehash_bucket)));
1153         inet_ehash_locks_free(&dccp_hashinfo);
1154         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1155         dccp_ackvec_exit();
1156         dccp_sysctl_exit();
1157 }
1158
1159 module_init(dccp_init);
1160 module_exit(dccp_fini);
1161
1162 MODULE_LICENSE("GPL");
1163 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1164 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");