dccp: Set per-connection CCIDs via socket options
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         INIT_LIST_HEAD(&dp->dccps_featneg);
197         /*
198          * FIXME: We're hardcoding the CCID, and doing this at this point makes
199          * the listening (master) sock get CCID control blocks, which is not
200          * necessary, but for now, to not mess with the test userspace apps,
201          * lets leave it here, later the real solution is to do this in a
202          * setsockopt(CCIDs-I-want/accept). -acme
203          */
204         if (likely(ctl_sock_initialized)) {
205                 int rc = dccp_feat_init(sk);
206
207                 if (rc)
208                         return rc;
209
210                 if (dmsk->dccpms_send_ack_vector) {
211                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212                         if (dp->dccps_hc_rx_ackvec == NULL)
213                                 return -ENOMEM;
214                 }
215                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216                                                       sk, GFP_KERNEL);
217                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218                                                       sk, GFP_KERNEL);
219                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220                              dp->dccps_hc_tx_ccid == NULL)) {
221                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223                         if (dmsk->dccpms_send_ack_vector) {
224                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225                                 dp->dccps_hc_rx_ackvec = NULL;
226                         }
227                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228                         return -ENOMEM;
229                 }
230         } else {
231                 /* control socket doesn't need feat nego */
232                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
234         }
235
236         return 0;
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
240
241 void dccp_destroy_sock(struct sock *sk)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244         struct dccp_minisock *dmsk = dccp_msk(sk);
245
246         /*
247          * DCCP doesn't use sk_write_queue, just sk_send_head
248          * for retransmissions
249          */
250         if (sk->sk_send_head != NULL) {
251                 kfree_skb(sk->sk_send_head);
252                 sk->sk_send_head = NULL;
253         }
254
255         /* Clean up a referenced DCCP bind bucket. */
256         if (inet_csk(sk)->icsk_bind_hash != NULL)
257                 inet_put_port(sk);
258
259         kfree(dp->dccps_service_list);
260         dp->dccps_service_list = NULL;
261
262         if (dmsk->dccpms_send_ack_vector) {
263                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264                 dp->dccps_hc_rx_ackvec = NULL;
265         }
266         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
269
270         /* clean up feature negotiation state */
271         dccp_feat_list_purge(&dp->dccps_featneg);
272 }
273
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
275
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
277 {
278         struct dccp_sock *dp = dccp_sk(sk);
279
280         dp->dccps_role = DCCP_ROLE_LISTEN;
281         /* do not start to listen if feature negotiation setup fails */
282         if (dccp_feat_finalise_settings(dp))
283                 return -EPROTO;
284         return inet_csk_listen_start(sk, backlog);
285 }
286
287 static inline int dccp_need_reset(int state)
288 {
289         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
290                state != DCCP_REQUESTING;
291 }
292
293 int dccp_disconnect(struct sock *sk, int flags)
294 {
295         struct inet_connection_sock *icsk = inet_csk(sk);
296         struct inet_sock *inet = inet_sk(sk);
297         int err = 0;
298         const int old_state = sk->sk_state;
299
300         if (old_state != DCCP_CLOSED)
301                 dccp_set_state(sk, DCCP_CLOSED);
302
303         /*
304          * This corresponds to the ABORT function of RFC793, sec. 3.8
305          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
306          */
307         if (old_state == DCCP_LISTEN) {
308                 inet_csk_listen_stop(sk);
309         } else if (dccp_need_reset(old_state)) {
310                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
311                 sk->sk_err = ECONNRESET;
312         } else if (old_state == DCCP_REQUESTING)
313                 sk->sk_err = ECONNRESET;
314
315         dccp_clear_xmit_timers(sk);
316
317         __skb_queue_purge(&sk->sk_receive_queue);
318         __skb_queue_purge(&sk->sk_write_queue);
319         if (sk->sk_send_head != NULL) {
320                 __kfree_skb(sk->sk_send_head);
321                 sk->sk_send_head = NULL;
322         }
323
324         inet->dport = 0;
325
326         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
327                 inet_reset_saddr(sk);
328
329         sk->sk_shutdown = 0;
330         sock_reset_flag(sk, SOCK_DONE);
331
332         icsk->icsk_backoff = 0;
333         inet_csk_delack_init(sk);
334         __sk_dst_reset(sk);
335
336         WARN_ON(inet->num && !icsk->icsk_bind_hash);
337
338         sk->sk_error_report(sk);
339         return err;
340 }
341
342 EXPORT_SYMBOL_GPL(dccp_disconnect);
343
344 /*
345  *      Wait for a DCCP event.
346  *
347  *      Note that we don't need to lock the socket, as the upper poll layers
348  *      take care of normal races (between the test and the event) and we don't
349  *      go look at any of the socket buffers directly.
350  */
351 unsigned int dccp_poll(struct file *file, struct socket *sock,
352                        poll_table *wait)
353 {
354         unsigned int mask;
355         struct sock *sk = sock->sk;
356
357         poll_wait(file, sk->sk_sleep, wait);
358         if (sk->sk_state == DCCP_LISTEN)
359                 return inet_csk_listen_poll(sk);
360
361         /* Socket is not locked. We are protected from async events
362            by poll logic and correct handling of state changes
363            made by another threads is impossible in any case.
364          */
365
366         mask = 0;
367         if (sk->sk_err)
368                 mask = POLLERR;
369
370         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
371                 mask |= POLLHUP;
372         if (sk->sk_shutdown & RCV_SHUTDOWN)
373                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
374
375         /* Connected? */
376         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
377                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
378                         mask |= POLLIN | POLLRDNORM;
379
380                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
381                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
382                                 mask |= POLLOUT | POLLWRNORM;
383                         } else {  /* send SIGIO later */
384                                 set_bit(SOCK_ASYNC_NOSPACE,
385                                         &sk->sk_socket->flags);
386                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
387
388                                 /* Race breaker. If space is freed after
389                                  * wspace test but before the flags are set,
390                                  * IO signal will be lost.
391                                  */
392                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
393                                         mask |= POLLOUT | POLLWRNORM;
394                         }
395                 }
396         }
397         return mask;
398 }
399
400 EXPORT_SYMBOL_GPL(dccp_poll);
401
402 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
403 {
404         int rc = -ENOTCONN;
405
406         lock_sock(sk);
407
408         if (sk->sk_state == DCCP_LISTEN)
409                 goto out;
410
411         switch (cmd) {
412         case SIOCINQ: {
413                 struct sk_buff *skb;
414                 unsigned long amount = 0;
415
416                 skb = skb_peek(&sk->sk_receive_queue);
417                 if (skb != NULL) {
418                         /*
419                          * We will only return the amount of this packet since
420                          * that is all that will be read.
421                          */
422                         amount = skb->len;
423                 }
424                 rc = put_user(amount, (int __user *)arg);
425         }
426                 break;
427         default:
428                 rc = -ENOIOCTLCMD;
429                 break;
430         }
431 out:
432         release_sock(sk);
433         return rc;
434 }
435
436 EXPORT_SYMBOL_GPL(dccp_ioctl);
437
438 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
439                                    char __user *optval, int optlen)
440 {
441         struct dccp_sock *dp = dccp_sk(sk);
442         struct dccp_service_list *sl = NULL;
443
444         if (service == DCCP_SERVICE_INVALID_VALUE ||
445             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
446                 return -EINVAL;
447
448         if (optlen > sizeof(service)) {
449                 sl = kmalloc(optlen, GFP_KERNEL);
450                 if (sl == NULL)
451                         return -ENOMEM;
452
453                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
454                 if (copy_from_user(sl->dccpsl_list,
455                                    optval + sizeof(service),
456                                    optlen - sizeof(service)) ||
457                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
458                         kfree(sl);
459                         return -EFAULT;
460                 }
461         }
462
463         lock_sock(sk);
464         dp->dccps_service = service;
465
466         kfree(dp->dccps_service_list);
467
468         dp->dccps_service_list = sl;
469         release_sock(sk);
470         return 0;
471 }
472
473 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
474 {
475         u8 *list, len;
476         int i, rc;
477
478         if (cscov < 0 || cscov > 15)
479                 return -EINVAL;
480         /*
481          * Populate a list of permissible values, in the range cscov...15. This
482          * is necessary since feature negotiation of single values only works if
483          * both sides incidentally choose the same value. Since the list starts
484          * lowest-value first, negotiation will pick the smallest shared value.
485          */
486         if (cscov == 0)
487                 return 0;
488         len = 16 - cscov;
489
490         list = kmalloc(len, GFP_KERNEL);
491         if (list == NULL)
492                 return -ENOBUFS;
493
494         for (i = 0; i < len; i++)
495                 list[i] = cscov++;
496
497         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
498
499         if (rc == 0) {
500                 if (rx)
501                         dccp_sk(sk)->dccps_pcrlen = cscov;
502                 else
503                         dccp_sk(sk)->dccps_pcslen = cscov;
504         }
505         kfree(list);
506         return rc;
507 }
508
509 static int dccp_setsockopt_ccid(struct sock *sk, int type,
510                                 char __user *optval, int optlen)
511 {
512         u8 *val;
513         int rc = 0;
514
515         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
516                 return -EINVAL;
517
518         val = kmalloc(optlen, GFP_KERNEL);
519         if (val == NULL)
520                 return -ENOMEM;
521
522         if (copy_from_user(val, optval, optlen)) {
523                 kfree(val);
524                 return -EFAULT;
525         }
526
527         lock_sock(sk);
528         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
529                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
530
531         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
532                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
533         release_sock(sk);
534
535         kfree(val);
536         return rc;
537 }
538
539 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
540                 char __user *optval, int optlen)
541 {
542         struct dccp_sock *dp = dccp_sk(sk);
543         int val, err = 0;
544
545         switch (optname) {
546         case DCCP_SOCKOPT_PACKET_SIZE:
547                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
548                 return 0;
549         case DCCP_SOCKOPT_CHANGE_L:
550         case DCCP_SOCKOPT_CHANGE_R:
551                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
552                 return 0;
553         case DCCP_SOCKOPT_CCID:
554         case DCCP_SOCKOPT_RX_CCID:
555         case DCCP_SOCKOPT_TX_CCID:
556                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
557         }
558
559         if (optlen < (int)sizeof(int))
560                 return -EINVAL;
561
562         if (get_user(val, (int __user *)optval))
563                 return -EFAULT;
564
565         if (optname == DCCP_SOCKOPT_SERVICE)
566                 return dccp_setsockopt_service(sk, val, optval, optlen);
567
568         lock_sock(sk);
569         switch (optname) {
570         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
571                 if (dp->dccps_role != DCCP_ROLE_SERVER)
572                         err = -EOPNOTSUPP;
573                 else
574                         dp->dccps_server_timewait = (val != 0);
575                 break;
576         case DCCP_SOCKOPT_SEND_CSCOV:
577                 err = dccp_setsockopt_cscov(sk, val, false);
578                 break;
579         case DCCP_SOCKOPT_RECV_CSCOV:
580                 err = dccp_setsockopt_cscov(sk, val, true);
581                 break;
582         default:
583                 err = -ENOPROTOOPT;
584                 break;
585         }
586         release_sock(sk);
587
588         return err;
589 }
590
591 int dccp_setsockopt(struct sock *sk, int level, int optname,
592                     char __user *optval, int optlen)
593 {
594         if (level != SOL_DCCP)
595                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
596                                                              optname, optval,
597                                                              optlen);
598         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
599 }
600
601 EXPORT_SYMBOL_GPL(dccp_setsockopt);
602
603 #ifdef CONFIG_COMPAT
604 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
605                            char __user *optval, int optlen)
606 {
607         if (level != SOL_DCCP)
608                 return inet_csk_compat_setsockopt(sk, level, optname,
609                                                   optval, optlen);
610         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
611 }
612
613 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
614 #endif
615
616 static int dccp_getsockopt_service(struct sock *sk, int len,
617                                    __be32 __user *optval,
618                                    int __user *optlen)
619 {
620         const struct dccp_sock *dp = dccp_sk(sk);
621         const struct dccp_service_list *sl;
622         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
623
624         lock_sock(sk);
625         if ((sl = dp->dccps_service_list) != NULL) {
626                 slen = sl->dccpsl_nr * sizeof(u32);
627                 total_len += slen;
628         }
629
630         err = -EINVAL;
631         if (total_len > len)
632                 goto out;
633
634         err = 0;
635         if (put_user(total_len, optlen) ||
636             put_user(dp->dccps_service, optval) ||
637             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
638                 err = -EFAULT;
639 out:
640         release_sock(sk);
641         return err;
642 }
643
644 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
645                     char __user *optval, int __user *optlen)
646 {
647         struct dccp_sock *dp;
648         int val, len;
649
650         if (get_user(len, optlen))
651                 return -EFAULT;
652
653         if (len < (int)sizeof(int))
654                 return -EINVAL;
655
656         dp = dccp_sk(sk);
657
658         switch (optname) {
659         case DCCP_SOCKOPT_PACKET_SIZE:
660                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
661                 return 0;
662         case DCCP_SOCKOPT_SERVICE:
663                 return dccp_getsockopt_service(sk, len,
664                                                (__be32 __user *)optval, optlen);
665         case DCCP_SOCKOPT_GET_CUR_MPS:
666                 val = dp->dccps_mss_cache;
667                 break;
668         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
669                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
670         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
671                 val = dp->dccps_server_timewait;
672                 break;
673         case DCCP_SOCKOPT_SEND_CSCOV:
674                 val = dp->dccps_pcslen;
675                 break;
676         case DCCP_SOCKOPT_RECV_CSCOV:
677                 val = dp->dccps_pcrlen;
678                 break;
679         case 128 ... 191:
680                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
681                                              len, (u32 __user *)optval, optlen);
682         case 192 ... 255:
683                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
684                                              len, (u32 __user *)optval, optlen);
685         default:
686                 return -ENOPROTOOPT;
687         }
688
689         len = sizeof(val);
690         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
691                 return -EFAULT;
692
693         return 0;
694 }
695
696 int dccp_getsockopt(struct sock *sk, int level, int optname,
697                     char __user *optval, int __user *optlen)
698 {
699         if (level != SOL_DCCP)
700                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
701                                                              optname, optval,
702                                                              optlen);
703         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
704 }
705
706 EXPORT_SYMBOL_GPL(dccp_getsockopt);
707
708 #ifdef CONFIG_COMPAT
709 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
710                            char __user *optval, int __user *optlen)
711 {
712         if (level != SOL_DCCP)
713                 return inet_csk_compat_getsockopt(sk, level, optname,
714                                                   optval, optlen);
715         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
716 }
717
718 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
719 #endif
720
721 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
722                  size_t len)
723 {
724         const struct dccp_sock *dp = dccp_sk(sk);
725         const int flags = msg->msg_flags;
726         const int noblock = flags & MSG_DONTWAIT;
727         struct sk_buff *skb;
728         int rc, size;
729         long timeo;
730
731         if (len > dp->dccps_mss_cache)
732                 return -EMSGSIZE;
733
734         lock_sock(sk);
735
736         if (sysctl_dccp_tx_qlen &&
737             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
738                 rc = -EAGAIN;
739                 goto out_release;
740         }
741
742         timeo = sock_sndtimeo(sk, noblock);
743
744         /*
745          * We have to use sk_stream_wait_connect here to set sk_write_pending,
746          * so that the trick in dccp_rcv_request_sent_state_process.
747          */
748         /* Wait for a connection to finish. */
749         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
750                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
751                         goto out_release;
752
753         size = sk->sk_prot->max_header + len;
754         release_sock(sk);
755         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
756         lock_sock(sk);
757         if (skb == NULL)
758                 goto out_release;
759
760         skb_reserve(skb, sk->sk_prot->max_header);
761         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
762         if (rc != 0)
763                 goto out_discard;
764
765         skb_queue_tail(&sk->sk_write_queue, skb);
766         dccp_write_xmit(sk,0);
767 out_release:
768         release_sock(sk);
769         return rc ? : len;
770 out_discard:
771         kfree_skb(skb);
772         goto out_release;
773 }
774
775 EXPORT_SYMBOL_GPL(dccp_sendmsg);
776
777 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
778                  size_t len, int nonblock, int flags, int *addr_len)
779 {
780         const struct dccp_hdr *dh;
781         long timeo;
782
783         lock_sock(sk);
784
785         if (sk->sk_state == DCCP_LISTEN) {
786                 len = -ENOTCONN;
787                 goto out;
788         }
789
790         timeo = sock_rcvtimeo(sk, nonblock);
791
792         do {
793                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
794
795                 if (skb == NULL)
796                         goto verify_sock_status;
797
798                 dh = dccp_hdr(skb);
799
800                 switch (dh->dccph_type) {
801                 case DCCP_PKT_DATA:
802                 case DCCP_PKT_DATAACK:
803                         goto found_ok_skb;
804
805                 case DCCP_PKT_CLOSE:
806                 case DCCP_PKT_CLOSEREQ:
807                         if (!(flags & MSG_PEEK))
808                                 dccp_finish_passive_close(sk);
809                         /* fall through */
810                 case DCCP_PKT_RESET:
811                         dccp_pr_debug("found fin (%s) ok!\n",
812                                       dccp_packet_name(dh->dccph_type));
813                         len = 0;
814                         goto found_fin_ok;
815                 default:
816                         dccp_pr_debug("packet_type=%s\n",
817                                       dccp_packet_name(dh->dccph_type));
818                         sk_eat_skb(sk, skb, 0);
819                 }
820 verify_sock_status:
821                 if (sock_flag(sk, SOCK_DONE)) {
822                         len = 0;
823                         break;
824                 }
825
826                 if (sk->sk_err) {
827                         len = sock_error(sk);
828                         break;
829                 }
830
831                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
832                         len = 0;
833                         break;
834                 }
835
836                 if (sk->sk_state == DCCP_CLOSED) {
837                         if (!sock_flag(sk, SOCK_DONE)) {
838                                 /* This occurs when user tries to read
839                                  * from never connected socket.
840                                  */
841                                 len = -ENOTCONN;
842                                 break;
843                         }
844                         len = 0;
845                         break;
846                 }
847
848                 if (!timeo) {
849                         len = -EAGAIN;
850                         break;
851                 }
852
853                 if (signal_pending(current)) {
854                         len = sock_intr_errno(timeo);
855                         break;
856                 }
857
858                 sk_wait_data(sk, &timeo);
859                 continue;
860         found_ok_skb:
861                 if (len > skb->len)
862                         len = skb->len;
863                 else if (len < skb->len)
864                         msg->msg_flags |= MSG_TRUNC;
865
866                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
867                         /* Exception. Bailout! */
868                         len = -EFAULT;
869                         break;
870                 }
871         found_fin_ok:
872                 if (!(flags & MSG_PEEK))
873                         sk_eat_skb(sk, skb, 0);
874                 break;
875         } while (1);
876 out:
877         release_sock(sk);
878         return len;
879 }
880
881 EXPORT_SYMBOL_GPL(dccp_recvmsg);
882
883 int inet_dccp_listen(struct socket *sock, int backlog)
884 {
885         struct sock *sk = sock->sk;
886         unsigned char old_state;
887         int err;
888
889         lock_sock(sk);
890
891         err = -EINVAL;
892         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
893                 goto out;
894
895         old_state = sk->sk_state;
896         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
897                 goto out;
898
899         /* Really, if the socket is already in listen state
900          * we can only allow the backlog to be adjusted.
901          */
902         if (old_state != DCCP_LISTEN) {
903                 /*
904                  * FIXME: here it probably should be sk->sk_prot->listen_start
905                  * see tcp_listen_start
906                  */
907                 err = dccp_listen_start(sk, backlog);
908                 if (err)
909                         goto out;
910         }
911         sk->sk_max_ack_backlog = backlog;
912         err = 0;
913
914 out:
915         release_sock(sk);
916         return err;
917 }
918
919 EXPORT_SYMBOL_GPL(inet_dccp_listen);
920
921 static void dccp_terminate_connection(struct sock *sk)
922 {
923         u8 next_state = DCCP_CLOSED;
924
925         switch (sk->sk_state) {
926         case DCCP_PASSIVE_CLOSE:
927         case DCCP_PASSIVE_CLOSEREQ:
928                 dccp_finish_passive_close(sk);
929                 break;
930         case DCCP_PARTOPEN:
931                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
932                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
933                 /* fall through */
934         case DCCP_OPEN:
935                 dccp_send_close(sk, 1);
936
937                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
938                     !dccp_sk(sk)->dccps_server_timewait)
939                         next_state = DCCP_ACTIVE_CLOSEREQ;
940                 else
941                         next_state = DCCP_CLOSING;
942                 /* fall through */
943         default:
944                 dccp_set_state(sk, next_state);
945         }
946 }
947
948 void dccp_close(struct sock *sk, long timeout)
949 {
950         struct dccp_sock *dp = dccp_sk(sk);
951         struct sk_buff *skb;
952         u32 data_was_unread = 0;
953         int state;
954
955         lock_sock(sk);
956
957         sk->sk_shutdown = SHUTDOWN_MASK;
958
959         if (sk->sk_state == DCCP_LISTEN) {
960                 dccp_set_state(sk, DCCP_CLOSED);
961
962                 /* Special case. */
963                 inet_csk_listen_stop(sk);
964
965                 goto adjudge_to_death;
966         }
967
968         sk_stop_timer(sk, &dp->dccps_xmit_timer);
969
970         /*
971          * We need to flush the recv. buffs.  We do this only on the
972          * descriptor close, not protocol-sourced closes, because the
973           *reader process may not have drained the data yet!
974          */
975         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
976                 data_was_unread += skb->len;
977                 __kfree_skb(skb);
978         }
979
980         if (data_was_unread) {
981                 /* Unread data was tossed, send an appropriate Reset Code */
982                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
983                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
984                 dccp_set_state(sk, DCCP_CLOSED);
985         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
986                 /* Check zero linger _after_ checking for unread data. */
987                 sk->sk_prot->disconnect(sk, 0);
988         } else if (sk->sk_state != DCCP_CLOSED) {
989                 dccp_terminate_connection(sk);
990         }
991
992         sk_stream_wait_close(sk, timeout);
993
994 adjudge_to_death:
995         state = sk->sk_state;
996         sock_hold(sk);
997         sock_orphan(sk);
998         atomic_inc(sk->sk_prot->orphan_count);
999
1000         /*
1001          * It is the last release_sock in its life. It will remove backlog.
1002          */
1003         release_sock(sk);
1004         /*
1005          * Now socket is owned by kernel and we acquire BH lock
1006          * to finish close. No need to check for user refs.
1007          */
1008         local_bh_disable();
1009         bh_lock_sock(sk);
1010         WARN_ON(sock_owned_by_user(sk));
1011
1012         /* Have we already been destroyed by a softirq or backlog? */
1013         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1014                 goto out;
1015
1016         if (sk->sk_state == DCCP_CLOSED)
1017                 inet_csk_destroy_sock(sk);
1018
1019         /* Otherwise, socket is reprieved until protocol close. */
1020
1021 out:
1022         bh_unlock_sock(sk);
1023         local_bh_enable();
1024         sock_put(sk);
1025 }
1026
1027 EXPORT_SYMBOL_GPL(dccp_close);
1028
1029 void dccp_shutdown(struct sock *sk, int how)
1030 {
1031         dccp_pr_debug("called shutdown(%x)\n", how);
1032 }
1033
1034 EXPORT_SYMBOL_GPL(dccp_shutdown);
1035
1036 static inline int dccp_mib_init(void)
1037 {
1038         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1039 }
1040
1041 static inline void dccp_mib_exit(void)
1042 {
1043         snmp_mib_free((void**)dccp_statistics);
1044 }
1045
1046 static int thash_entries;
1047 module_param(thash_entries, int, 0444);
1048 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1049
1050 #ifdef CONFIG_IP_DCCP_DEBUG
1051 int dccp_debug;
1052 module_param(dccp_debug, bool, 0644);
1053 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1054
1055 EXPORT_SYMBOL_GPL(dccp_debug);
1056 #endif
1057
1058 static int __init dccp_init(void)
1059 {
1060         unsigned long goal;
1061         int ehash_order, bhash_order, i;
1062         int rc = -ENOBUFS;
1063
1064         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1065                      FIELD_SIZEOF(struct sk_buff, cb));
1066
1067         dccp_hashinfo.bind_bucket_cachep =
1068                 kmem_cache_create("dccp_bind_bucket",
1069                                   sizeof(struct inet_bind_bucket), 0,
1070                                   SLAB_HWCACHE_ALIGN, NULL);
1071         if (!dccp_hashinfo.bind_bucket_cachep)
1072                 goto out;
1073
1074         /*
1075          * Size and allocate the main established and bind bucket
1076          * hash tables.
1077          *
1078          * The methodology is similar to that of the buffer cache.
1079          */
1080         if (num_physpages >= (128 * 1024))
1081                 goal = num_physpages >> (21 - PAGE_SHIFT);
1082         else
1083                 goal = num_physpages >> (23 - PAGE_SHIFT);
1084
1085         if (thash_entries)
1086                 goal = (thash_entries *
1087                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1088         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1089                 ;
1090         do {
1091                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1092                                         sizeof(struct inet_ehash_bucket);
1093                 while (dccp_hashinfo.ehash_size &
1094                        (dccp_hashinfo.ehash_size - 1))
1095                         dccp_hashinfo.ehash_size--;
1096                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1097                         __get_free_pages(GFP_ATOMIC, ehash_order);
1098         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1099
1100         if (!dccp_hashinfo.ehash) {
1101                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1102                 goto out_free_bind_bucket_cachep;
1103         }
1104
1105         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1106                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1107                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1108         }
1109
1110         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1111                         goto out_free_dccp_ehash;
1112
1113         bhash_order = ehash_order;
1114
1115         do {
1116                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1117                                         sizeof(struct inet_bind_hashbucket);
1118                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1119                     bhash_order > 0)
1120                         continue;
1121                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1122                         __get_free_pages(GFP_ATOMIC, bhash_order);
1123         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1124
1125         if (!dccp_hashinfo.bhash) {
1126                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1127                 goto out_free_dccp_locks;
1128         }
1129
1130         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1131                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1132                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1133         }
1134
1135         rc = dccp_mib_init();
1136         if (rc)
1137                 goto out_free_dccp_bhash;
1138
1139         rc = dccp_ackvec_init();
1140         if (rc)
1141                 goto out_free_dccp_mib;
1142
1143         rc = dccp_sysctl_init();
1144         if (rc)
1145                 goto out_ackvec_exit;
1146
1147         dccp_timestamping_init();
1148 out:
1149         return rc;
1150 out_ackvec_exit:
1151         dccp_ackvec_exit();
1152 out_free_dccp_mib:
1153         dccp_mib_exit();
1154 out_free_dccp_bhash:
1155         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1156         dccp_hashinfo.bhash = NULL;
1157 out_free_dccp_locks:
1158         inet_ehash_locks_free(&dccp_hashinfo);
1159 out_free_dccp_ehash:
1160         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1161         dccp_hashinfo.ehash = NULL;
1162 out_free_bind_bucket_cachep:
1163         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1164         dccp_hashinfo.bind_bucket_cachep = NULL;
1165         goto out;
1166 }
1167
1168 static void __exit dccp_fini(void)
1169 {
1170         dccp_mib_exit();
1171         free_pages((unsigned long)dccp_hashinfo.bhash,
1172                    get_order(dccp_hashinfo.bhash_size *
1173                              sizeof(struct inet_bind_hashbucket)));
1174         free_pages((unsigned long)dccp_hashinfo.ehash,
1175                    get_order(dccp_hashinfo.ehash_size *
1176                              sizeof(struct inet_ehash_bucket)));
1177         inet_ehash_locks_free(&dccp_hashinfo);
1178         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1179         dccp_ackvec_exit();
1180         dccp_sysctl_exit();
1181 }
1182
1183 module_init(dccp_init);
1184 module_exit(dccp_fini);
1185
1186 MODULE_LICENSE("GPL");
1187 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1188 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");