dccp: Tidy up setsockopt calls
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         INIT_LIST_HEAD(&dp->dccps_featneg);
197         /*
198          * FIXME: We're hardcoding the CCID, and doing this at this point makes
199          * the listening (master) sock get CCID control blocks, which is not
200          * necessary, but for now, to not mess with the test userspace apps,
201          * lets leave it here, later the real solution is to do this in a
202          * setsockopt(CCIDs-I-want/accept). -acme
203          */
204         if (likely(ctl_sock_initialized)) {
205                 int rc = dccp_feat_init(sk);
206
207                 if (rc)
208                         return rc;
209
210                 if (dmsk->dccpms_send_ack_vector) {
211                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212                         if (dp->dccps_hc_rx_ackvec == NULL)
213                                 return -ENOMEM;
214                 }
215                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216                                                       sk, GFP_KERNEL);
217                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218                                                       sk, GFP_KERNEL);
219                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220                              dp->dccps_hc_tx_ccid == NULL)) {
221                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223                         if (dmsk->dccpms_send_ack_vector) {
224                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225                                 dp->dccps_hc_rx_ackvec = NULL;
226                         }
227                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228                         return -ENOMEM;
229                 }
230         } else {
231                 /* control socket doesn't need feat nego */
232                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
234         }
235
236         return 0;
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
240
241 void dccp_destroy_sock(struct sock *sk)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244         struct dccp_minisock *dmsk = dccp_msk(sk);
245
246         /*
247          * DCCP doesn't use sk_write_queue, just sk_send_head
248          * for retransmissions
249          */
250         if (sk->sk_send_head != NULL) {
251                 kfree_skb(sk->sk_send_head);
252                 sk->sk_send_head = NULL;
253         }
254
255         /* Clean up a referenced DCCP bind bucket. */
256         if (inet_csk(sk)->icsk_bind_hash != NULL)
257                 inet_put_port(sk);
258
259         kfree(dp->dccps_service_list);
260         dp->dccps_service_list = NULL;
261
262         if (dmsk->dccpms_send_ack_vector) {
263                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264                 dp->dccps_hc_rx_ackvec = NULL;
265         }
266         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
269
270         /* clean up feature negotiation state */
271         dccp_feat_list_purge(&dp->dccps_featneg);
272 }
273
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
275
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
277 {
278         struct dccp_sock *dp = dccp_sk(sk);
279
280         dp->dccps_role = DCCP_ROLE_LISTEN;
281         /* do not start to listen if feature negotiation setup fails */
282         if (dccp_feat_finalise_settings(dp))
283                 return -EPROTO;
284         return inet_csk_listen_start(sk, backlog);
285 }
286
287 static inline int dccp_need_reset(int state)
288 {
289         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
290                state != DCCP_REQUESTING;
291 }
292
293 int dccp_disconnect(struct sock *sk, int flags)
294 {
295         struct inet_connection_sock *icsk = inet_csk(sk);
296         struct inet_sock *inet = inet_sk(sk);
297         int err = 0;
298         const int old_state = sk->sk_state;
299
300         if (old_state != DCCP_CLOSED)
301                 dccp_set_state(sk, DCCP_CLOSED);
302
303         /*
304          * This corresponds to the ABORT function of RFC793, sec. 3.8
305          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
306          */
307         if (old_state == DCCP_LISTEN) {
308                 inet_csk_listen_stop(sk);
309         } else if (dccp_need_reset(old_state)) {
310                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
311                 sk->sk_err = ECONNRESET;
312         } else if (old_state == DCCP_REQUESTING)
313                 sk->sk_err = ECONNRESET;
314
315         dccp_clear_xmit_timers(sk);
316
317         __skb_queue_purge(&sk->sk_receive_queue);
318         __skb_queue_purge(&sk->sk_write_queue);
319         if (sk->sk_send_head != NULL) {
320                 __kfree_skb(sk->sk_send_head);
321                 sk->sk_send_head = NULL;
322         }
323
324         inet->dport = 0;
325
326         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
327                 inet_reset_saddr(sk);
328
329         sk->sk_shutdown = 0;
330         sock_reset_flag(sk, SOCK_DONE);
331
332         icsk->icsk_backoff = 0;
333         inet_csk_delack_init(sk);
334         __sk_dst_reset(sk);
335
336         WARN_ON(inet->num && !icsk->icsk_bind_hash);
337
338         sk->sk_error_report(sk);
339         return err;
340 }
341
342 EXPORT_SYMBOL_GPL(dccp_disconnect);
343
344 /*
345  *      Wait for a DCCP event.
346  *
347  *      Note that we don't need to lock the socket, as the upper poll layers
348  *      take care of normal races (between the test and the event) and we don't
349  *      go look at any of the socket buffers directly.
350  */
351 unsigned int dccp_poll(struct file *file, struct socket *sock,
352                        poll_table *wait)
353 {
354         unsigned int mask;
355         struct sock *sk = sock->sk;
356
357         poll_wait(file, sk->sk_sleep, wait);
358         if (sk->sk_state == DCCP_LISTEN)
359                 return inet_csk_listen_poll(sk);
360
361         /* Socket is not locked. We are protected from async events
362            by poll logic and correct handling of state changes
363            made by another threads is impossible in any case.
364          */
365
366         mask = 0;
367         if (sk->sk_err)
368                 mask = POLLERR;
369
370         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
371                 mask |= POLLHUP;
372         if (sk->sk_shutdown & RCV_SHUTDOWN)
373                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
374
375         /* Connected? */
376         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
377                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
378                         mask |= POLLIN | POLLRDNORM;
379
380                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
381                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
382                                 mask |= POLLOUT | POLLWRNORM;
383                         } else {  /* send SIGIO later */
384                                 set_bit(SOCK_ASYNC_NOSPACE,
385                                         &sk->sk_socket->flags);
386                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
387
388                                 /* Race breaker. If space is freed after
389                                  * wspace test but before the flags are set,
390                                  * IO signal will be lost.
391                                  */
392                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
393                                         mask |= POLLOUT | POLLWRNORM;
394                         }
395                 }
396         }
397         return mask;
398 }
399
400 EXPORT_SYMBOL_GPL(dccp_poll);
401
402 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
403 {
404         int rc = -ENOTCONN;
405
406         lock_sock(sk);
407
408         if (sk->sk_state == DCCP_LISTEN)
409                 goto out;
410
411         switch (cmd) {
412         case SIOCINQ: {
413                 struct sk_buff *skb;
414                 unsigned long amount = 0;
415
416                 skb = skb_peek(&sk->sk_receive_queue);
417                 if (skb != NULL) {
418                         /*
419                          * We will only return the amount of this packet since
420                          * that is all that will be read.
421                          */
422                         amount = skb->len;
423                 }
424                 rc = put_user(amount, (int __user *)arg);
425         }
426                 break;
427         default:
428                 rc = -ENOIOCTLCMD;
429                 break;
430         }
431 out:
432         release_sock(sk);
433         return rc;
434 }
435
436 EXPORT_SYMBOL_GPL(dccp_ioctl);
437
438 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
439                                    char __user *optval, int optlen)
440 {
441         struct dccp_sock *dp = dccp_sk(sk);
442         struct dccp_service_list *sl = NULL;
443
444         if (service == DCCP_SERVICE_INVALID_VALUE ||
445             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
446                 return -EINVAL;
447
448         if (optlen > sizeof(service)) {
449                 sl = kmalloc(optlen, GFP_KERNEL);
450                 if (sl == NULL)
451                         return -ENOMEM;
452
453                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
454                 if (copy_from_user(sl->dccpsl_list,
455                                    optval + sizeof(service),
456                                    optlen - sizeof(service)) ||
457                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
458                         kfree(sl);
459                         return -EFAULT;
460                 }
461         }
462
463         lock_sock(sk);
464         dp->dccps_service = service;
465
466         kfree(dp->dccps_service_list);
467
468         dp->dccps_service_list = sl;
469         release_sock(sk);
470         return 0;
471 }
472
473 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
474 {
475         u8 *list, len;
476         int i, rc;
477
478         if (cscov < 0 || cscov > 15)
479                 return -EINVAL;
480         /*
481          * Populate a list of permissible values, in the range cscov...15. This
482          * is necessary since feature negotiation of single values only works if
483          * both sides incidentally choose the same value. Since the list starts
484          * lowest-value first, negotiation will pick the smallest shared value.
485          */
486         if (cscov == 0)
487                 return 0;
488         len = 16 - cscov;
489
490         list = kmalloc(len, GFP_KERNEL);
491         if (list == NULL)
492                 return -ENOBUFS;
493
494         for (i = 0; i < len; i++)
495                 list[i] = cscov++;
496
497         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
498
499         if (rc == 0) {
500                 if (rx)
501                         dccp_sk(sk)->dccps_pcrlen = cscov;
502                 else
503                         dccp_sk(sk)->dccps_pcslen = cscov;
504         }
505         kfree(list);
506         return rc;
507 }
508
509 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
510                 char __user *optval, int optlen)
511 {
512         struct dccp_sock *dp = dccp_sk(sk);
513         int val, err = 0;
514
515         switch (optname) {
516         case DCCP_SOCKOPT_PACKET_SIZE:
517                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
518                 return 0;
519         case DCCP_SOCKOPT_CHANGE_L:
520         case DCCP_SOCKOPT_CHANGE_R:
521                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
522                 return 0;
523         }
524
525         if (optlen < (int)sizeof(int))
526                 return -EINVAL;
527
528         if (get_user(val, (int __user *)optval))
529                 return -EFAULT;
530
531         if (optname == DCCP_SOCKOPT_SERVICE)
532                 return dccp_setsockopt_service(sk, val, optval, optlen);
533
534         lock_sock(sk);
535         switch (optname) {
536         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
537                 if (dp->dccps_role != DCCP_ROLE_SERVER)
538                         err = -EOPNOTSUPP;
539                 else
540                         dp->dccps_server_timewait = (val != 0);
541                 break;
542         case DCCP_SOCKOPT_SEND_CSCOV:
543                 err = dccp_setsockopt_cscov(sk, val, false);
544                 break;
545         case DCCP_SOCKOPT_RECV_CSCOV:
546                 err = dccp_setsockopt_cscov(sk, val, true);
547                 break;
548         default:
549                 err = -ENOPROTOOPT;
550                 break;
551         }
552         release_sock(sk);
553
554         return err;
555 }
556
557 int dccp_setsockopt(struct sock *sk, int level, int optname,
558                     char __user *optval, int optlen)
559 {
560         if (level != SOL_DCCP)
561                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
562                                                              optname, optval,
563                                                              optlen);
564         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
565 }
566
567 EXPORT_SYMBOL_GPL(dccp_setsockopt);
568
569 #ifdef CONFIG_COMPAT
570 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
571                            char __user *optval, int optlen)
572 {
573         if (level != SOL_DCCP)
574                 return inet_csk_compat_setsockopt(sk, level, optname,
575                                                   optval, optlen);
576         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
577 }
578
579 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
580 #endif
581
582 static int dccp_getsockopt_service(struct sock *sk, int len,
583                                    __be32 __user *optval,
584                                    int __user *optlen)
585 {
586         const struct dccp_sock *dp = dccp_sk(sk);
587         const struct dccp_service_list *sl;
588         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
589
590         lock_sock(sk);
591         if ((sl = dp->dccps_service_list) != NULL) {
592                 slen = sl->dccpsl_nr * sizeof(u32);
593                 total_len += slen;
594         }
595
596         err = -EINVAL;
597         if (total_len > len)
598                 goto out;
599
600         err = 0;
601         if (put_user(total_len, optlen) ||
602             put_user(dp->dccps_service, optval) ||
603             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
604                 err = -EFAULT;
605 out:
606         release_sock(sk);
607         return err;
608 }
609
610 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
611                     char __user *optval, int __user *optlen)
612 {
613         struct dccp_sock *dp;
614         int val, len;
615
616         if (get_user(len, optlen))
617                 return -EFAULT;
618
619         if (len < (int)sizeof(int))
620                 return -EINVAL;
621
622         dp = dccp_sk(sk);
623
624         switch (optname) {
625         case DCCP_SOCKOPT_PACKET_SIZE:
626                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
627                 return 0;
628         case DCCP_SOCKOPT_SERVICE:
629                 return dccp_getsockopt_service(sk, len,
630                                                (__be32 __user *)optval, optlen);
631         case DCCP_SOCKOPT_GET_CUR_MPS:
632                 val = dp->dccps_mss_cache;
633                 break;
634         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
635                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
636         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
637                 val = dp->dccps_server_timewait;
638                 break;
639         case DCCP_SOCKOPT_SEND_CSCOV:
640                 val = dp->dccps_pcslen;
641                 break;
642         case DCCP_SOCKOPT_RECV_CSCOV:
643                 val = dp->dccps_pcrlen;
644                 break;
645         case 128 ... 191:
646                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
647                                              len, (u32 __user *)optval, optlen);
648         case 192 ... 255:
649                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
650                                              len, (u32 __user *)optval, optlen);
651         default:
652                 return -ENOPROTOOPT;
653         }
654
655         len = sizeof(val);
656         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
657                 return -EFAULT;
658
659         return 0;
660 }
661
662 int dccp_getsockopt(struct sock *sk, int level, int optname,
663                     char __user *optval, int __user *optlen)
664 {
665         if (level != SOL_DCCP)
666                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
667                                                              optname, optval,
668                                                              optlen);
669         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
670 }
671
672 EXPORT_SYMBOL_GPL(dccp_getsockopt);
673
674 #ifdef CONFIG_COMPAT
675 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
676                            char __user *optval, int __user *optlen)
677 {
678         if (level != SOL_DCCP)
679                 return inet_csk_compat_getsockopt(sk, level, optname,
680                                                   optval, optlen);
681         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
682 }
683
684 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
685 #endif
686
687 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
688                  size_t len)
689 {
690         const struct dccp_sock *dp = dccp_sk(sk);
691         const int flags = msg->msg_flags;
692         const int noblock = flags & MSG_DONTWAIT;
693         struct sk_buff *skb;
694         int rc, size;
695         long timeo;
696
697         if (len > dp->dccps_mss_cache)
698                 return -EMSGSIZE;
699
700         lock_sock(sk);
701
702         if (sysctl_dccp_tx_qlen &&
703             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
704                 rc = -EAGAIN;
705                 goto out_release;
706         }
707
708         timeo = sock_sndtimeo(sk, noblock);
709
710         /*
711          * We have to use sk_stream_wait_connect here to set sk_write_pending,
712          * so that the trick in dccp_rcv_request_sent_state_process.
713          */
714         /* Wait for a connection to finish. */
715         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
716                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
717                         goto out_release;
718
719         size = sk->sk_prot->max_header + len;
720         release_sock(sk);
721         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
722         lock_sock(sk);
723         if (skb == NULL)
724                 goto out_release;
725
726         skb_reserve(skb, sk->sk_prot->max_header);
727         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
728         if (rc != 0)
729                 goto out_discard;
730
731         skb_queue_tail(&sk->sk_write_queue, skb);
732         dccp_write_xmit(sk,0);
733 out_release:
734         release_sock(sk);
735         return rc ? : len;
736 out_discard:
737         kfree_skb(skb);
738         goto out_release;
739 }
740
741 EXPORT_SYMBOL_GPL(dccp_sendmsg);
742
743 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
744                  size_t len, int nonblock, int flags, int *addr_len)
745 {
746         const struct dccp_hdr *dh;
747         long timeo;
748
749         lock_sock(sk);
750
751         if (sk->sk_state == DCCP_LISTEN) {
752                 len = -ENOTCONN;
753                 goto out;
754         }
755
756         timeo = sock_rcvtimeo(sk, nonblock);
757
758         do {
759                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
760
761                 if (skb == NULL)
762                         goto verify_sock_status;
763
764                 dh = dccp_hdr(skb);
765
766                 switch (dh->dccph_type) {
767                 case DCCP_PKT_DATA:
768                 case DCCP_PKT_DATAACK:
769                         goto found_ok_skb;
770
771                 case DCCP_PKT_CLOSE:
772                 case DCCP_PKT_CLOSEREQ:
773                         if (!(flags & MSG_PEEK))
774                                 dccp_finish_passive_close(sk);
775                         /* fall through */
776                 case DCCP_PKT_RESET:
777                         dccp_pr_debug("found fin (%s) ok!\n",
778                                       dccp_packet_name(dh->dccph_type));
779                         len = 0;
780                         goto found_fin_ok;
781                 default:
782                         dccp_pr_debug("packet_type=%s\n",
783                                       dccp_packet_name(dh->dccph_type));
784                         sk_eat_skb(sk, skb, 0);
785                 }
786 verify_sock_status:
787                 if (sock_flag(sk, SOCK_DONE)) {
788                         len = 0;
789                         break;
790                 }
791
792                 if (sk->sk_err) {
793                         len = sock_error(sk);
794                         break;
795                 }
796
797                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
798                         len = 0;
799                         break;
800                 }
801
802                 if (sk->sk_state == DCCP_CLOSED) {
803                         if (!sock_flag(sk, SOCK_DONE)) {
804                                 /* This occurs when user tries to read
805                                  * from never connected socket.
806                                  */
807                                 len = -ENOTCONN;
808                                 break;
809                         }
810                         len = 0;
811                         break;
812                 }
813
814                 if (!timeo) {
815                         len = -EAGAIN;
816                         break;
817                 }
818
819                 if (signal_pending(current)) {
820                         len = sock_intr_errno(timeo);
821                         break;
822                 }
823
824                 sk_wait_data(sk, &timeo);
825                 continue;
826         found_ok_skb:
827                 if (len > skb->len)
828                         len = skb->len;
829                 else if (len < skb->len)
830                         msg->msg_flags |= MSG_TRUNC;
831
832                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
833                         /* Exception. Bailout! */
834                         len = -EFAULT;
835                         break;
836                 }
837         found_fin_ok:
838                 if (!(flags & MSG_PEEK))
839                         sk_eat_skb(sk, skb, 0);
840                 break;
841         } while (1);
842 out:
843         release_sock(sk);
844         return len;
845 }
846
847 EXPORT_SYMBOL_GPL(dccp_recvmsg);
848
849 int inet_dccp_listen(struct socket *sock, int backlog)
850 {
851         struct sock *sk = sock->sk;
852         unsigned char old_state;
853         int err;
854
855         lock_sock(sk);
856
857         err = -EINVAL;
858         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
859                 goto out;
860
861         old_state = sk->sk_state;
862         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
863                 goto out;
864
865         /* Really, if the socket is already in listen state
866          * we can only allow the backlog to be adjusted.
867          */
868         if (old_state != DCCP_LISTEN) {
869                 /*
870                  * FIXME: here it probably should be sk->sk_prot->listen_start
871                  * see tcp_listen_start
872                  */
873                 err = dccp_listen_start(sk, backlog);
874                 if (err)
875                         goto out;
876         }
877         sk->sk_max_ack_backlog = backlog;
878         err = 0;
879
880 out:
881         release_sock(sk);
882         return err;
883 }
884
885 EXPORT_SYMBOL_GPL(inet_dccp_listen);
886
887 static void dccp_terminate_connection(struct sock *sk)
888 {
889         u8 next_state = DCCP_CLOSED;
890
891         switch (sk->sk_state) {
892         case DCCP_PASSIVE_CLOSE:
893         case DCCP_PASSIVE_CLOSEREQ:
894                 dccp_finish_passive_close(sk);
895                 break;
896         case DCCP_PARTOPEN:
897                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
898                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
899                 /* fall through */
900         case DCCP_OPEN:
901                 dccp_send_close(sk, 1);
902
903                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
904                     !dccp_sk(sk)->dccps_server_timewait)
905                         next_state = DCCP_ACTIVE_CLOSEREQ;
906                 else
907                         next_state = DCCP_CLOSING;
908                 /* fall through */
909         default:
910                 dccp_set_state(sk, next_state);
911         }
912 }
913
914 void dccp_close(struct sock *sk, long timeout)
915 {
916         struct dccp_sock *dp = dccp_sk(sk);
917         struct sk_buff *skb;
918         u32 data_was_unread = 0;
919         int state;
920
921         lock_sock(sk);
922
923         sk->sk_shutdown = SHUTDOWN_MASK;
924
925         if (sk->sk_state == DCCP_LISTEN) {
926                 dccp_set_state(sk, DCCP_CLOSED);
927
928                 /* Special case. */
929                 inet_csk_listen_stop(sk);
930
931                 goto adjudge_to_death;
932         }
933
934         sk_stop_timer(sk, &dp->dccps_xmit_timer);
935
936         /*
937          * We need to flush the recv. buffs.  We do this only on the
938          * descriptor close, not protocol-sourced closes, because the
939           *reader process may not have drained the data yet!
940          */
941         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
942                 data_was_unread += skb->len;
943                 __kfree_skb(skb);
944         }
945
946         if (data_was_unread) {
947                 /* Unread data was tossed, send an appropriate Reset Code */
948                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
949                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
950                 dccp_set_state(sk, DCCP_CLOSED);
951         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
952                 /* Check zero linger _after_ checking for unread data. */
953                 sk->sk_prot->disconnect(sk, 0);
954         } else if (sk->sk_state != DCCP_CLOSED) {
955                 dccp_terminate_connection(sk);
956         }
957
958         sk_stream_wait_close(sk, timeout);
959
960 adjudge_to_death:
961         state = sk->sk_state;
962         sock_hold(sk);
963         sock_orphan(sk);
964         atomic_inc(sk->sk_prot->orphan_count);
965
966         /*
967          * It is the last release_sock in its life. It will remove backlog.
968          */
969         release_sock(sk);
970         /*
971          * Now socket is owned by kernel and we acquire BH lock
972          * to finish close. No need to check for user refs.
973          */
974         local_bh_disable();
975         bh_lock_sock(sk);
976         WARN_ON(sock_owned_by_user(sk));
977
978         /* Have we already been destroyed by a softirq or backlog? */
979         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
980                 goto out;
981
982         if (sk->sk_state == DCCP_CLOSED)
983                 inet_csk_destroy_sock(sk);
984
985         /* Otherwise, socket is reprieved until protocol close. */
986
987 out:
988         bh_unlock_sock(sk);
989         local_bh_enable();
990         sock_put(sk);
991 }
992
993 EXPORT_SYMBOL_GPL(dccp_close);
994
995 void dccp_shutdown(struct sock *sk, int how)
996 {
997         dccp_pr_debug("called shutdown(%x)\n", how);
998 }
999
1000 EXPORT_SYMBOL_GPL(dccp_shutdown);
1001
1002 static inline int dccp_mib_init(void)
1003 {
1004         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1005 }
1006
1007 static inline void dccp_mib_exit(void)
1008 {
1009         snmp_mib_free((void**)dccp_statistics);
1010 }
1011
1012 static int thash_entries;
1013 module_param(thash_entries, int, 0444);
1014 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1015
1016 #ifdef CONFIG_IP_DCCP_DEBUG
1017 int dccp_debug;
1018 module_param(dccp_debug, bool, 0644);
1019 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1020
1021 EXPORT_SYMBOL_GPL(dccp_debug);
1022 #endif
1023
1024 static int __init dccp_init(void)
1025 {
1026         unsigned long goal;
1027         int ehash_order, bhash_order, i;
1028         int rc = -ENOBUFS;
1029
1030         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1031                      FIELD_SIZEOF(struct sk_buff, cb));
1032
1033         dccp_hashinfo.bind_bucket_cachep =
1034                 kmem_cache_create("dccp_bind_bucket",
1035                                   sizeof(struct inet_bind_bucket), 0,
1036                                   SLAB_HWCACHE_ALIGN, NULL);
1037         if (!dccp_hashinfo.bind_bucket_cachep)
1038                 goto out;
1039
1040         /*
1041          * Size and allocate the main established and bind bucket
1042          * hash tables.
1043          *
1044          * The methodology is similar to that of the buffer cache.
1045          */
1046         if (num_physpages >= (128 * 1024))
1047                 goal = num_physpages >> (21 - PAGE_SHIFT);
1048         else
1049                 goal = num_physpages >> (23 - PAGE_SHIFT);
1050
1051         if (thash_entries)
1052                 goal = (thash_entries *
1053                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1054         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1055                 ;
1056         do {
1057                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1058                                         sizeof(struct inet_ehash_bucket);
1059                 while (dccp_hashinfo.ehash_size &
1060                        (dccp_hashinfo.ehash_size - 1))
1061                         dccp_hashinfo.ehash_size--;
1062                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1063                         __get_free_pages(GFP_ATOMIC, ehash_order);
1064         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1065
1066         if (!dccp_hashinfo.ehash) {
1067                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1068                 goto out_free_bind_bucket_cachep;
1069         }
1070
1071         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1072                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1073                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1074         }
1075
1076         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1077                         goto out_free_dccp_ehash;
1078
1079         bhash_order = ehash_order;
1080
1081         do {
1082                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1083                                         sizeof(struct inet_bind_hashbucket);
1084                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1085                     bhash_order > 0)
1086                         continue;
1087                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1088                         __get_free_pages(GFP_ATOMIC, bhash_order);
1089         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1090
1091         if (!dccp_hashinfo.bhash) {
1092                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1093                 goto out_free_dccp_locks;
1094         }
1095
1096         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1097                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1098                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1099         }
1100
1101         rc = dccp_mib_init();
1102         if (rc)
1103                 goto out_free_dccp_bhash;
1104
1105         rc = dccp_ackvec_init();
1106         if (rc)
1107                 goto out_free_dccp_mib;
1108
1109         rc = dccp_sysctl_init();
1110         if (rc)
1111                 goto out_ackvec_exit;
1112
1113         dccp_timestamping_init();
1114 out:
1115         return rc;
1116 out_ackvec_exit:
1117         dccp_ackvec_exit();
1118 out_free_dccp_mib:
1119         dccp_mib_exit();
1120 out_free_dccp_bhash:
1121         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1122         dccp_hashinfo.bhash = NULL;
1123 out_free_dccp_locks:
1124         inet_ehash_locks_free(&dccp_hashinfo);
1125 out_free_dccp_ehash:
1126         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1127         dccp_hashinfo.ehash = NULL;
1128 out_free_bind_bucket_cachep:
1129         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1130         dccp_hashinfo.bind_bucket_cachep = NULL;
1131         goto out;
1132 }
1133
1134 static void __exit dccp_fini(void)
1135 {
1136         dccp_mib_exit();
1137         free_pages((unsigned long)dccp_hashinfo.bhash,
1138                    get_order(dccp_hashinfo.bhash_size *
1139                              sizeof(struct inet_bind_hashbucket)));
1140         free_pages((unsigned long)dccp_hashinfo.ehash,
1141                    get_order(dccp_hashinfo.ehash_size *
1142                              sizeof(struct inet_ehash_bucket)));
1143         inet_ehash_locks_free(&dccp_hashinfo);
1144         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1145         dccp_ackvec_exit();
1146         dccp_sysctl_exit();
1147 }
1148
1149 module_init(dccp_init);
1150 module_exit(dccp_fini);
1151
1152 MODULE_LICENSE("GPL");
1153 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1154 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");