ecf3be961e11218894661af68996e4154e5a9854
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 /* Client retransmits all Confirm options until entering OPEN */
71                 if (oldstate == DCCP_PARTOPEN)
72                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
73                 break;
74
75         case DCCP_CLOSED:
76                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
77                     oldstate == DCCP_CLOSING)
78                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
79
80                 sk->sk_prot->unhash(sk);
81                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
82                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
83                         inet_put_port(sk);
84                 /* fall through */
85         default:
86                 if (oldstate == DCCP_OPEN)
87                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88         }
89
90         /* Change state AFTER socket is unhashed to avoid closed
91          * socket sitting in hash tables.
92          */
93         sk->sk_state = state;
94 }
95
96 EXPORT_SYMBOL_GPL(dccp_set_state);
97
98 static void dccp_finish_passive_close(struct sock *sk)
99 {
100         switch (sk->sk_state) {
101         case DCCP_PASSIVE_CLOSE:
102                 /* Node (client or server) has received Close packet. */
103                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
104                 dccp_set_state(sk, DCCP_CLOSED);
105                 break;
106         case DCCP_PASSIVE_CLOSEREQ:
107                 /*
108                  * Client received CloseReq. We set the `active' flag so that
109                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
110                  */
111                 dccp_send_close(sk, 1);
112                 dccp_set_state(sk, DCCP_CLOSING);
113         }
114 }
115
116 void dccp_done(struct sock *sk)
117 {
118         dccp_set_state(sk, DCCP_CLOSED);
119         dccp_clear_xmit_timers(sk);
120
121         sk->sk_shutdown = SHUTDOWN_MASK;
122
123         if (!sock_flag(sk, SOCK_DEAD))
124                 sk->sk_state_change(sk);
125         else
126                 inet_csk_destroy_sock(sk);
127 }
128
129 EXPORT_SYMBOL_GPL(dccp_done);
130
131 const char *dccp_packet_name(const int type)
132 {
133         static const char *dccp_packet_names[] = {
134                 [DCCP_PKT_REQUEST]  = "REQUEST",
135                 [DCCP_PKT_RESPONSE] = "RESPONSE",
136                 [DCCP_PKT_DATA]     = "DATA",
137                 [DCCP_PKT_ACK]      = "ACK",
138                 [DCCP_PKT_DATAACK]  = "DATAACK",
139                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
140                 [DCCP_PKT_CLOSE]    = "CLOSE",
141                 [DCCP_PKT_RESET]    = "RESET",
142                 [DCCP_PKT_SYNC]     = "SYNC",
143                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
144         };
145
146         if (type >= DCCP_NR_PKT_TYPES)
147                 return "INVALID";
148         else
149                 return dccp_packet_names[type];
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_packet_name);
153
154 const char *dccp_state_name(const int state)
155 {
156         static char *dccp_state_names[] = {
157         [DCCP_OPEN]             = "OPEN",
158         [DCCP_REQUESTING]       = "REQUESTING",
159         [DCCP_PARTOPEN]         = "PARTOPEN",
160         [DCCP_LISTEN]           = "LISTEN",
161         [DCCP_RESPOND]          = "RESPOND",
162         [DCCP_CLOSING]          = "CLOSING",
163         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
164         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
165         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
166         [DCCP_TIME_WAIT]        = "TIME_WAIT",
167         [DCCP_CLOSED]           = "CLOSED",
168         };
169
170         if (state >= DCCP_MAX_STATES)
171                 return "INVALID STATE!";
172         else
173                 return dccp_state_names[state];
174 }
175
176 EXPORT_SYMBOL_GPL(dccp_state_name);
177
178 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
179 {
180         struct dccp_sock *dp = dccp_sk(sk);
181         struct inet_connection_sock *icsk = inet_csk(sk);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = TCP_MIN_RCVMSS;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
193
194         dccp_init_xmit_timers(sk);
195
196         INIT_LIST_HEAD(&dp->dccps_featneg);
197         /* control socket doesn't need feat nego */
198         if (likely(ctl_sock_initialized))
199                 return dccp_feat_init(sk);
200         return 0;
201 }
202
203 EXPORT_SYMBOL_GPL(dccp_init_sock);
204
205 void dccp_destroy_sock(struct sock *sk)
206 {
207         struct dccp_sock *dp = dccp_sk(sk);
208
209         /*
210          * DCCP doesn't use sk_write_queue, just sk_send_head
211          * for retransmissions
212          */
213         if (sk->sk_send_head != NULL) {
214                 kfree_skb(sk->sk_send_head);
215                 sk->sk_send_head = NULL;
216         }
217
218         /* Clean up a referenced DCCP bind bucket. */
219         if (inet_csk(sk)->icsk_bind_hash != NULL)
220                 inet_put_port(sk);
221
222         kfree(dp->dccps_service_list);
223         dp->dccps_service_list = NULL;
224
225         if (dp->dccps_hc_rx_ackvec != NULL) {
226                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
227                 dp->dccps_hc_rx_ackvec = NULL;
228         }
229         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
230         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
231         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
232
233         /* clean up feature negotiation state */
234         dccp_feat_list_purge(&dp->dccps_featneg);
235 }
236
237 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
238
239 static inline int dccp_listen_start(struct sock *sk, int backlog)
240 {
241         struct dccp_sock *dp = dccp_sk(sk);
242
243         dp->dccps_role = DCCP_ROLE_LISTEN;
244         /* do not start to listen if feature negotiation setup fails */
245         if (dccp_feat_finalise_settings(dp))
246                 return -EPROTO;
247         return inet_csk_listen_start(sk, backlog);
248 }
249
250 static inline int dccp_need_reset(int state)
251 {
252         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
253                state != DCCP_REQUESTING;
254 }
255
256 int dccp_disconnect(struct sock *sk, int flags)
257 {
258         struct inet_connection_sock *icsk = inet_csk(sk);
259         struct inet_sock *inet = inet_sk(sk);
260         int err = 0;
261         const int old_state = sk->sk_state;
262
263         if (old_state != DCCP_CLOSED)
264                 dccp_set_state(sk, DCCP_CLOSED);
265
266         /*
267          * This corresponds to the ABORT function of RFC793, sec. 3.8
268          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
269          */
270         if (old_state == DCCP_LISTEN) {
271                 inet_csk_listen_stop(sk);
272         } else if (dccp_need_reset(old_state)) {
273                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
274                 sk->sk_err = ECONNRESET;
275         } else if (old_state == DCCP_REQUESTING)
276                 sk->sk_err = ECONNRESET;
277
278         dccp_clear_xmit_timers(sk);
279
280         __skb_queue_purge(&sk->sk_receive_queue);
281         __skb_queue_purge(&sk->sk_write_queue);
282         if (sk->sk_send_head != NULL) {
283                 __kfree_skb(sk->sk_send_head);
284                 sk->sk_send_head = NULL;
285         }
286
287         inet->dport = 0;
288
289         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
290                 inet_reset_saddr(sk);
291
292         sk->sk_shutdown = 0;
293         sock_reset_flag(sk, SOCK_DONE);
294
295         icsk->icsk_backoff = 0;
296         inet_csk_delack_init(sk);
297         __sk_dst_reset(sk);
298
299         WARN_ON(inet->num && !icsk->icsk_bind_hash);
300
301         sk->sk_error_report(sk);
302         return err;
303 }
304
305 EXPORT_SYMBOL_GPL(dccp_disconnect);
306
307 /*
308  *      Wait for a DCCP event.
309  *
310  *      Note that we don't need to lock the socket, as the upper poll layers
311  *      take care of normal races (between the test and the event) and we don't
312  *      go look at any of the socket buffers directly.
313  */
314 unsigned int dccp_poll(struct file *file, struct socket *sock,
315                        poll_table *wait)
316 {
317         unsigned int mask;
318         struct sock *sk = sock->sk;
319
320         poll_wait(file, sk->sk_sleep, wait);
321         if (sk->sk_state == DCCP_LISTEN)
322                 return inet_csk_listen_poll(sk);
323
324         /* Socket is not locked. We are protected from async events
325            by poll logic and correct handling of state changes
326            made by another threads is impossible in any case.
327          */
328
329         mask = 0;
330         if (sk->sk_err)
331                 mask = POLLERR;
332
333         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
334                 mask |= POLLHUP;
335         if (sk->sk_shutdown & RCV_SHUTDOWN)
336                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
337
338         /* Connected? */
339         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
340                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
341                         mask |= POLLIN | POLLRDNORM;
342
343                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
344                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
345                                 mask |= POLLOUT | POLLWRNORM;
346                         } else {  /* send SIGIO later */
347                                 set_bit(SOCK_ASYNC_NOSPACE,
348                                         &sk->sk_socket->flags);
349                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
350
351                                 /* Race breaker. If space is freed after
352                                  * wspace test but before the flags are set,
353                                  * IO signal will be lost.
354                                  */
355                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
356                                         mask |= POLLOUT | POLLWRNORM;
357                         }
358                 }
359         }
360         return mask;
361 }
362
363 EXPORT_SYMBOL_GPL(dccp_poll);
364
365 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
366 {
367         int rc = -ENOTCONN;
368
369         lock_sock(sk);
370
371         if (sk->sk_state == DCCP_LISTEN)
372                 goto out;
373
374         switch (cmd) {
375         case SIOCINQ: {
376                 struct sk_buff *skb;
377                 unsigned long amount = 0;
378
379                 skb = skb_peek(&sk->sk_receive_queue);
380                 if (skb != NULL) {
381                         /*
382                          * We will only return the amount of this packet since
383                          * that is all that will be read.
384                          */
385                         amount = skb->len;
386                 }
387                 rc = put_user(amount, (int __user *)arg);
388         }
389                 break;
390         default:
391                 rc = -ENOIOCTLCMD;
392                 break;
393         }
394 out:
395         release_sock(sk);
396         return rc;
397 }
398
399 EXPORT_SYMBOL_GPL(dccp_ioctl);
400
401 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
402                                    char __user *optval, int optlen)
403 {
404         struct dccp_sock *dp = dccp_sk(sk);
405         struct dccp_service_list *sl = NULL;
406
407         if (service == DCCP_SERVICE_INVALID_VALUE ||
408             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
409                 return -EINVAL;
410
411         if (optlen > sizeof(service)) {
412                 sl = kmalloc(optlen, GFP_KERNEL);
413                 if (sl == NULL)
414                         return -ENOMEM;
415
416                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
417                 if (copy_from_user(sl->dccpsl_list,
418                                    optval + sizeof(service),
419                                    optlen - sizeof(service)) ||
420                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
421                         kfree(sl);
422                         return -EFAULT;
423                 }
424         }
425
426         lock_sock(sk);
427         dp->dccps_service = service;
428
429         kfree(dp->dccps_service_list);
430
431         dp->dccps_service_list = sl;
432         release_sock(sk);
433         return 0;
434 }
435
436 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
437 {
438         u8 *list, len;
439         int i, rc;
440
441         if (cscov < 0 || cscov > 15)
442                 return -EINVAL;
443         /*
444          * Populate a list of permissible values, in the range cscov...15. This
445          * is necessary since feature negotiation of single values only works if
446          * both sides incidentally choose the same value. Since the list starts
447          * lowest-value first, negotiation will pick the smallest shared value.
448          */
449         if (cscov == 0)
450                 return 0;
451         len = 16 - cscov;
452
453         list = kmalloc(len, GFP_KERNEL);
454         if (list == NULL)
455                 return -ENOBUFS;
456
457         for (i = 0; i < len; i++)
458                 list[i] = cscov++;
459
460         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
461
462         if (rc == 0) {
463                 if (rx)
464                         dccp_sk(sk)->dccps_pcrlen = cscov;
465                 else
466                         dccp_sk(sk)->dccps_pcslen = cscov;
467         }
468         kfree(list);
469         return rc;
470 }
471
472 static int dccp_setsockopt_ccid(struct sock *sk, int type,
473                                 char __user *optval, int optlen)
474 {
475         u8 *val;
476         int rc = 0;
477
478         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
479                 return -EINVAL;
480
481         val = kmalloc(optlen, GFP_KERNEL);
482         if (val == NULL)
483                 return -ENOMEM;
484
485         if (copy_from_user(val, optval, optlen)) {
486                 kfree(val);
487                 return -EFAULT;
488         }
489
490         lock_sock(sk);
491         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
492                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
493
494         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
495                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
496         release_sock(sk);
497
498         kfree(val);
499         return rc;
500 }
501
502 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
503                 char __user *optval, int optlen)
504 {
505         struct dccp_sock *dp = dccp_sk(sk);
506         int val, err = 0;
507
508         switch (optname) {
509         case DCCP_SOCKOPT_PACKET_SIZE:
510                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
511                 return 0;
512         case DCCP_SOCKOPT_CHANGE_L:
513         case DCCP_SOCKOPT_CHANGE_R:
514                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
515                 return 0;
516         case DCCP_SOCKOPT_CCID:
517         case DCCP_SOCKOPT_RX_CCID:
518         case DCCP_SOCKOPT_TX_CCID:
519                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
520         }
521
522         if (optlen < (int)sizeof(int))
523                 return -EINVAL;
524
525         if (get_user(val, (int __user *)optval))
526                 return -EFAULT;
527
528         if (optname == DCCP_SOCKOPT_SERVICE)
529                 return dccp_setsockopt_service(sk, val, optval, optlen);
530
531         lock_sock(sk);
532         switch (optname) {
533         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
534                 if (dp->dccps_role != DCCP_ROLE_SERVER)
535                         err = -EOPNOTSUPP;
536                 else
537                         dp->dccps_server_timewait = (val != 0);
538                 break;
539         case DCCP_SOCKOPT_SEND_CSCOV:
540                 err = dccp_setsockopt_cscov(sk, val, false);
541                 break;
542         case DCCP_SOCKOPT_RECV_CSCOV:
543                 err = dccp_setsockopt_cscov(sk, val, true);
544                 break;
545         case DCCP_SOCKOPT_QPOLICY_ID:
546                 if (sk->sk_state != DCCP_CLOSED)
547                         err = -EISCONN;
548                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
549                         err = -EINVAL;
550                 else
551                         dp->dccps_qpolicy = val;
552                 break;
553         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
554                 if (val < 0)
555                         err = -EINVAL;
556                 else
557                         dp->dccps_tx_qlen = val;
558                 break;
559         default:
560                 err = -ENOPROTOOPT;
561                 break;
562         }
563         release_sock(sk);
564
565         return err;
566 }
567
568 int dccp_setsockopt(struct sock *sk, int level, int optname,
569                     char __user *optval, int optlen)
570 {
571         if (level != SOL_DCCP)
572                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
573                                                              optname, optval,
574                                                              optlen);
575         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
576 }
577
578 EXPORT_SYMBOL_GPL(dccp_setsockopt);
579
580 #ifdef CONFIG_COMPAT
581 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
582                            char __user *optval, int optlen)
583 {
584         if (level != SOL_DCCP)
585                 return inet_csk_compat_setsockopt(sk, level, optname,
586                                                   optval, optlen);
587         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
588 }
589
590 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
591 #endif
592
593 static int dccp_getsockopt_service(struct sock *sk, int len,
594                                    __be32 __user *optval,
595                                    int __user *optlen)
596 {
597         const struct dccp_sock *dp = dccp_sk(sk);
598         const struct dccp_service_list *sl;
599         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
600
601         lock_sock(sk);
602         if ((sl = dp->dccps_service_list) != NULL) {
603                 slen = sl->dccpsl_nr * sizeof(u32);
604                 total_len += slen;
605         }
606
607         err = -EINVAL;
608         if (total_len > len)
609                 goto out;
610
611         err = 0;
612         if (put_user(total_len, optlen) ||
613             put_user(dp->dccps_service, optval) ||
614             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
615                 err = -EFAULT;
616 out:
617         release_sock(sk);
618         return err;
619 }
620
621 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
622                     char __user *optval, int __user *optlen)
623 {
624         struct dccp_sock *dp;
625         int val, len;
626
627         if (get_user(len, optlen))
628                 return -EFAULT;
629
630         if (len < (int)sizeof(int))
631                 return -EINVAL;
632
633         dp = dccp_sk(sk);
634
635         switch (optname) {
636         case DCCP_SOCKOPT_PACKET_SIZE:
637                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
638                 return 0;
639         case DCCP_SOCKOPT_SERVICE:
640                 return dccp_getsockopt_service(sk, len,
641                                                (__be32 __user *)optval, optlen);
642         case DCCP_SOCKOPT_GET_CUR_MPS:
643                 val = dp->dccps_mss_cache;
644                 break;
645         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
646                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
647         case DCCP_SOCKOPT_TX_CCID:
648                 val = ccid_get_current_tx_ccid(dp);
649                 if (val < 0)
650                         return -ENOPROTOOPT;
651                 break;
652         case DCCP_SOCKOPT_RX_CCID:
653                 val = ccid_get_current_rx_ccid(dp);
654                 if (val < 0)
655                         return -ENOPROTOOPT;
656                 break;
657         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
658                 val = dp->dccps_server_timewait;
659                 break;
660         case DCCP_SOCKOPT_SEND_CSCOV:
661                 val = dp->dccps_pcslen;
662                 break;
663         case DCCP_SOCKOPT_RECV_CSCOV:
664                 val = dp->dccps_pcrlen;
665                 break;
666         case DCCP_SOCKOPT_QPOLICY_ID:
667                 val = dp->dccps_qpolicy;
668                 break;
669         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
670                 val = dp->dccps_tx_qlen;
671                 break;
672         case 128 ... 191:
673                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
674                                              len, (u32 __user *)optval, optlen);
675         case 192 ... 255:
676                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
677                                              len, (u32 __user *)optval, optlen);
678         default:
679                 return -ENOPROTOOPT;
680         }
681
682         len = sizeof(val);
683         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
684                 return -EFAULT;
685
686         return 0;
687 }
688
689 int dccp_getsockopt(struct sock *sk, int level, int optname,
690                     char __user *optval, int __user *optlen)
691 {
692         if (level != SOL_DCCP)
693                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
694                                                              optname, optval,
695                                                              optlen);
696         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
697 }
698
699 EXPORT_SYMBOL_GPL(dccp_getsockopt);
700
701 #ifdef CONFIG_COMPAT
702 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
703                            char __user *optval, int __user *optlen)
704 {
705         if (level != SOL_DCCP)
706                 return inet_csk_compat_getsockopt(sk, level, optname,
707                                                   optval, optlen);
708         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
709 }
710
711 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
712 #endif
713
714 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
715 {
716         struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
717
718         /*
719          * Assign an (opaque) qpolicy priority value to skb->priority.
720          *
721          * We are overloading this skb field for use with the qpolicy subystem.
722          * The skb->priority is normally used for the SO_PRIORITY option, which
723          * is initialised from sk_priority. Since the assignment of sk_priority
724          * to skb->priority happens later (on layer 3), we overload this field
725          * for use with queueing priorities as long as the skb is on layer 4.
726          * The default priority value (if nothing is set) is 0.
727          */
728         skb->priority = 0;
729
730         for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
731
732                 if (!CMSG_OK(msg, cmsg))
733                         return -EINVAL;
734
735                 if (cmsg->cmsg_level != SOL_DCCP)
736                         continue;
737
738                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
739                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
740                         return -EINVAL;
741
742                 switch (cmsg->cmsg_type) {
743                 case DCCP_SCM_PRIORITY:
744                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
745                                 return -EINVAL;
746                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
747                         break;
748                 default:
749                         return -EINVAL;
750                 }
751         }
752         return 0;
753 }
754
755 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
756                  size_t len)
757 {
758         const struct dccp_sock *dp = dccp_sk(sk);
759         const int flags = msg->msg_flags;
760         const int noblock = flags & MSG_DONTWAIT;
761         struct sk_buff *skb;
762         int rc, size;
763         long timeo;
764
765         if (len > dp->dccps_mss_cache)
766                 return -EMSGSIZE;
767
768         lock_sock(sk);
769
770         if (dccp_qpolicy_full(sk)) {
771                 rc = -EAGAIN;
772                 goto out_release;
773         }
774
775         timeo = sock_sndtimeo(sk, noblock);
776
777         /*
778          * We have to use sk_stream_wait_connect here to set sk_write_pending,
779          * so that the trick in dccp_rcv_request_sent_state_process.
780          */
781         /* Wait for a connection to finish. */
782         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
783                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
784                         goto out_release;
785
786         size = sk->sk_prot->max_header + len;
787         release_sock(sk);
788         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
789         lock_sock(sk);
790         if (skb == NULL)
791                 goto out_release;
792
793         skb_reserve(skb, sk->sk_prot->max_header);
794         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
795         if (rc != 0)
796                 goto out_discard;
797
798         rc = dccp_msghdr_parse(msg, skb);
799         if (rc != 0)
800                 goto out_discard;
801
802         dccp_qpolicy_push(sk, skb);
803         dccp_write_xmit(sk);
804 out_release:
805         release_sock(sk);
806         return rc ? : len;
807 out_discard:
808         kfree_skb(skb);
809         goto out_release;
810 }
811
812 EXPORT_SYMBOL_GPL(dccp_sendmsg);
813
814 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
815                  size_t len, int nonblock, int flags, int *addr_len)
816 {
817         const struct dccp_hdr *dh;
818         long timeo;
819
820         lock_sock(sk);
821
822         if (sk->sk_state == DCCP_LISTEN) {
823                 len = -ENOTCONN;
824                 goto out;
825         }
826
827         timeo = sock_rcvtimeo(sk, nonblock);
828
829         do {
830                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
831
832                 if (skb == NULL)
833                         goto verify_sock_status;
834
835                 dh = dccp_hdr(skb);
836
837                 switch (dh->dccph_type) {
838                 case DCCP_PKT_DATA:
839                 case DCCP_PKT_DATAACK:
840                         goto found_ok_skb;
841
842                 case DCCP_PKT_CLOSE:
843                 case DCCP_PKT_CLOSEREQ:
844                         if (!(flags & MSG_PEEK))
845                                 dccp_finish_passive_close(sk);
846                         /* fall through */
847                 case DCCP_PKT_RESET:
848                         dccp_pr_debug("found fin (%s) ok!\n",
849                                       dccp_packet_name(dh->dccph_type));
850                         len = 0;
851                         goto found_fin_ok;
852                 default:
853                         dccp_pr_debug("packet_type=%s\n",
854                                       dccp_packet_name(dh->dccph_type));
855                         sk_eat_skb(sk, skb, 0);
856                 }
857 verify_sock_status:
858                 if (sock_flag(sk, SOCK_DONE)) {
859                         len = 0;
860                         break;
861                 }
862
863                 if (sk->sk_err) {
864                         len = sock_error(sk);
865                         break;
866                 }
867
868                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
869                         len = 0;
870                         break;
871                 }
872
873                 if (sk->sk_state == DCCP_CLOSED) {
874                         if (!sock_flag(sk, SOCK_DONE)) {
875                                 /* This occurs when user tries to read
876                                  * from never connected socket.
877                                  */
878                                 len = -ENOTCONN;
879                                 break;
880                         }
881                         len = 0;
882                         break;
883                 }
884
885                 if (!timeo) {
886                         len = -EAGAIN;
887                         break;
888                 }
889
890                 if (signal_pending(current)) {
891                         len = sock_intr_errno(timeo);
892                         break;
893                 }
894
895                 sk_wait_data(sk, &timeo);
896                 continue;
897         found_ok_skb:
898                 if (len > skb->len)
899                         len = skb->len;
900                 else if (len < skb->len)
901                         msg->msg_flags |= MSG_TRUNC;
902
903                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
904                         /* Exception. Bailout! */
905                         len = -EFAULT;
906                         break;
907                 }
908         found_fin_ok:
909                 if (!(flags & MSG_PEEK))
910                         sk_eat_skb(sk, skb, 0);
911                 break;
912         } while (1);
913 out:
914         release_sock(sk);
915         return len;
916 }
917
918 EXPORT_SYMBOL_GPL(dccp_recvmsg);
919
920 int inet_dccp_listen(struct socket *sock, int backlog)
921 {
922         struct sock *sk = sock->sk;
923         unsigned char old_state;
924         int err;
925
926         lock_sock(sk);
927
928         err = -EINVAL;
929         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
930                 goto out;
931
932         old_state = sk->sk_state;
933         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
934                 goto out;
935
936         /* Really, if the socket is already in listen state
937          * we can only allow the backlog to be adjusted.
938          */
939         if (old_state != DCCP_LISTEN) {
940                 /*
941                  * FIXME: here it probably should be sk->sk_prot->listen_start
942                  * see tcp_listen_start
943                  */
944                 err = dccp_listen_start(sk, backlog);
945                 if (err)
946                         goto out;
947         }
948         sk->sk_max_ack_backlog = backlog;
949         err = 0;
950
951 out:
952         release_sock(sk);
953         return err;
954 }
955
956 EXPORT_SYMBOL_GPL(inet_dccp_listen);
957
958 static void dccp_terminate_connection(struct sock *sk)
959 {
960         u8 next_state = DCCP_CLOSED;
961
962         switch (sk->sk_state) {
963         case DCCP_PASSIVE_CLOSE:
964         case DCCP_PASSIVE_CLOSEREQ:
965                 dccp_finish_passive_close(sk);
966                 break;
967         case DCCP_PARTOPEN:
968                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
969                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
970                 /* fall through */
971         case DCCP_OPEN:
972                 dccp_send_close(sk, 1);
973
974                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
975                     !dccp_sk(sk)->dccps_server_timewait)
976                         next_state = DCCP_ACTIVE_CLOSEREQ;
977                 else
978                         next_state = DCCP_CLOSING;
979                 /* fall through */
980         default:
981                 dccp_set_state(sk, next_state);
982         }
983 }
984
985 void dccp_close(struct sock *sk, long timeout)
986 {
987         struct dccp_sock *dp = dccp_sk(sk);
988         struct sk_buff *skb;
989         u32 data_was_unread = 0;
990         int state;
991
992         lock_sock(sk);
993
994         sk->sk_shutdown = SHUTDOWN_MASK;
995
996         if (sk->sk_state == DCCP_LISTEN) {
997                 dccp_set_state(sk, DCCP_CLOSED);
998
999                 /* Special case. */
1000                 inet_csk_listen_stop(sk);
1001
1002                 goto adjudge_to_death;
1003         }
1004
1005         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1006
1007         /*
1008          * We need to flush the recv. buffs.  We do this only on the
1009          * descriptor close, not protocol-sourced closes, because the
1010           *reader process may not have drained the data yet!
1011          */
1012         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1013                 data_was_unread += skb->len;
1014                 __kfree_skb(skb);
1015         }
1016
1017         if (data_was_unread) {
1018                 /* Unread data was tossed, send an appropriate Reset Code */
1019                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
1020                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1021                 dccp_set_state(sk, DCCP_CLOSED);
1022         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1023                 /* Check zero linger _after_ checking for unread data. */
1024                 sk->sk_prot->disconnect(sk, 0);
1025         } else if (sk->sk_state != DCCP_CLOSED) {
1026                 /*
1027                  * Normal connection termination. May need to wait if there are
1028                  * still packets in the TX queue that are delayed by the CCID.
1029                  */
1030                 dccp_flush_write_queue(sk, &timeout);
1031                 dccp_terminate_connection(sk);
1032         }
1033
1034         /*
1035          * Flush write queue. This may be necessary in several cases:
1036          * - we have been closed by the peer but still have application data;
1037          * - abortive termination (unread data or zero linger time),
1038          * - normal termination but queue could not be flushed within time limit
1039          */
1040         __skb_queue_purge(&sk->sk_write_queue);
1041
1042         sk_stream_wait_close(sk, timeout);
1043
1044 adjudge_to_death:
1045         state = sk->sk_state;
1046         sock_hold(sk);
1047         sock_orphan(sk);
1048         atomic_inc(sk->sk_prot->orphan_count);
1049
1050         /*
1051          * It is the last release_sock in its life. It will remove backlog.
1052          */
1053         release_sock(sk);
1054         /*
1055          * Now socket is owned by kernel and we acquire BH lock
1056          * to finish close. No need to check for user refs.
1057          */
1058         local_bh_disable();
1059         bh_lock_sock(sk);
1060         WARN_ON(sock_owned_by_user(sk));
1061
1062         /* Have we already been destroyed by a softirq or backlog? */
1063         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1064                 goto out;
1065
1066         if (sk->sk_state == DCCP_CLOSED)
1067                 inet_csk_destroy_sock(sk);
1068
1069         /* Otherwise, socket is reprieved until protocol close. */
1070
1071 out:
1072         bh_unlock_sock(sk);
1073         local_bh_enable();
1074         sock_put(sk);
1075 }
1076
1077 EXPORT_SYMBOL_GPL(dccp_close);
1078
1079 void dccp_shutdown(struct sock *sk, int how)
1080 {
1081         dccp_pr_debug("called shutdown(%x)\n", how);
1082 }
1083
1084 EXPORT_SYMBOL_GPL(dccp_shutdown);
1085
1086 static inline int dccp_mib_init(void)
1087 {
1088         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1089 }
1090
1091 static inline void dccp_mib_exit(void)
1092 {
1093         snmp_mib_free((void**)dccp_statistics);
1094 }
1095
1096 static int thash_entries;
1097 module_param(thash_entries, int, 0444);
1098 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1099
1100 #ifdef CONFIG_IP_DCCP_DEBUG
1101 int dccp_debug;
1102 module_param(dccp_debug, bool, 0644);
1103 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1104
1105 EXPORT_SYMBOL_GPL(dccp_debug);
1106 #endif
1107
1108 static int __init dccp_init(void)
1109 {
1110         unsigned long goal;
1111         int ehash_order, bhash_order, i;
1112         int rc = -ENOBUFS;
1113
1114         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1115                      FIELD_SIZEOF(struct sk_buff, cb));
1116
1117         dccp_hashinfo.bind_bucket_cachep =
1118                 kmem_cache_create("dccp_bind_bucket",
1119                                   sizeof(struct inet_bind_bucket), 0,
1120                                   SLAB_HWCACHE_ALIGN, NULL);
1121         if (!dccp_hashinfo.bind_bucket_cachep)
1122                 goto out;
1123
1124         /*
1125          * Size and allocate the main established and bind bucket
1126          * hash tables.
1127          *
1128          * The methodology is similar to that of the buffer cache.
1129          */
1130         if (num_physpages >= (128 * 1024))
1131                 goal = num_physpages >> (21 - PAGE_SHIFT);
1132         else
1133                 goal = num_physpages >> (23 - PAGE_SHIFT);
1134
1135         if (thash_entries)
1136                 goal = (thash_entries *
1137                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1138         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1139                 ;
1140         do {
1141                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1142                                         sizeof(struct inet_ehash_bucket);
1143                 while (dccp_hashinfo.ehash_size &
1144                        (dccp_hashinfo.ehash_size - 1))
1145                         dccp_hashinfo.ehash_size--;
1146                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1147                         __get_free_pages(GFP_ATOMIC, ehash_order);
1148         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1149
1150         if (!dccp_hashinfo.ehash) {
1151                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1152                 goto out_free_bind_bucket_cachep;
1153         }
1154
1155         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1156                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1157                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1158         }
1159
1160         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1161                         goto out_free_dccp_ehash;
1162
1163         bhash_order = ehash_order;
1164
1165         do {
1166                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1167                                         sizeof(struct inet_bind_hashbucket);
1168                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1169                     bhash_order > 0)
1170                         continue;
1171                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1172                         __get_free_pages(GFP_ATOMIC, bhash_order);
1173         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1174
1175         if (!dccp_hashinfo.bhash) {
1176                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1177                 goto out_free_dccp_locks;
1178         }
1179
1180         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1181                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1182                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1183         }
1184
1185         rc = dccp_mib_init();
1186         if (rc)
1187                 goto out_free_dccp_bhash;
1188
1189         rc = dccp_ackvec_init();
1190         if (rc)
1191                 goto out_free_dccp_mib;
1192
1193         rc = dccp_sysctl_init();
1194         if (rc)
1195                 goto out_ackvec_exit;
1196
1197         dccp_timestamping_init();
1198 out:
1199         return rc;
1200 out_ackvec_exit:
1201         dccp_ackvec_exit();
1202 out_free_dccp_mib:
1203         dccp_mib_exit();
1204 out_free_dccp_bhash:
1205         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1206         dccp_hashinfo.bhash = NULL;
1207 out_free_dccp_locks:
1208         inet_ehash_locks_free(&dccp_hashinfo);
1209 out_free_dccp_ehash:
1210         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1211         dccp_hashinfo.ehash = NULL;
1212 out_free_bind_bucket_cachep:
1213         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1214         dccp_hashinfo.bind_bucket_cachep = NULL;
1215         goto out;
1216 }
1217
1218 static void __exit dccp_fini(void)
1219 {
1220         dccp_mib_exit();
1221         free_pages((unsigned long)dccp_hashinfo.bhash,
1222                    get_order(dccp_hashinfo.bhash_size *
1223                              sizeof(struct inet_bind_hashbucket)));
1224         free_pages((unsigned long)dccp_hashinfo.ehash,
1225                    get_order(dccp_hashinfo.ehash_size *
1226                              sizeof(struct inet_ehash_bucket)));
1227         inet_ehash_locks_free(&dccp_hashinfo);
1228         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1229         dccp_ackvec_exit();
1230         dccp_sysctl_exit();
1231 }
1232
1233 module_init(dccp_init);
1234 module_exit(dccp_fini);
1235
1236 MODULE_LICENSE("GPL");
1237 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1238 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");