[DCCP] minisock: Rename struct dccp_options to struct dccp_minisock
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/sock.h>
28 #include <net/xfrm.h>
29
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49         .lhash_lock     = RW_LOCK_UNLOCKED,
50         .lhash_users    = ATOMIC_INIT(0),
51         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 void dccp_set_state(struct sock *sk, const int state)
57 {
58         const int oldstate = sk->sk_state;
59
60         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
61                       dccp_role(sk), sk,
62                       dccp_state_name(oldstate), dccp_state_name(state));
63         WARN_ON(state == oldstate);
64
65         switch (state) {
66         case DCCP_OPEN:
67                 if (oldstate != DCCP_OPEN)
68                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
69                 break;
70
71         case DCCP_CLOSED:
72                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
73                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
74
75                 sk->sk_prot->unhash(sk);
76                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
77                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
78                         inet_put_port(&dccp_hashinfo, sk);
79                 /* fall through */
80         default:
81                 if (oldstate == DCCP_OPEN)
82                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
83         }
84
85         /* Change state AFTER socket is unhashed to avoid closed
86          * socket sitting in hash tables.
87          */
88         sk->sk_state = state;
89 }
90
91 EXPORT_SYMBOL_GPL(dccp_set_state);
92
93 void dccp_done(struct sock *sk)
94 {
95         dccp_set_state(sk, DCCP_CLOSED);
96         dccp_clear_xmit_timers(sk);
97
98         sk->sk_shutdown = SHUTDOWN_MASK;
99
100         if (!sock_flag(sk, SOCK_DEAD))
101                 sk->sk_state_change(sk);
102         else
103                 inet_csk_destroy_sock(sk);
104 }
105
106 EXPORT_SYMBOL_GPL(dccp_done);
107
108 const char *dccp_packet_name(const int type)
109 {
110         static const char *dccp_packet_names[] = {
111                 [DCCP_PKT_REQUEST]  = "REQUEST",
112                 [DCCP_PKT_RESPONSE] = "RESPONSE",
113                 [DCCP_PKT_DATA]     = "DATA",
114                 [DCCP_PKT_ACK]      = "ACK",
115                 [DCCP_PKT_DATAACK]  = "DATAACK",
116                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
117                 [DCCP_PKT_CLOSE]    = "CLOSE",
118                 [DCCP_PKT_RESET]    = "RESET",
119                 [DCCP_PKT_SYNC]     = "SYNC",
120                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
121         };
122
123         if (type >= DCCP_NR_PKT_TYPES)
124                 return "INVALID";
125         else
126                 return dccp_packet_names[type];
127 }
128
129 EXPORT_SYMBOL_GPL(dccp_packet_name);
130
131 const char *dccp_state_name(const int state)
132 {
133         static char *dccp_state_names[] = {
134         [DCCP_OPEN]       = "OPEN",
135         [DCCP_REQUESTING] = "REQUESTING",
136         [DCCP_PARTOPEN]   = "PARTOPEN",
137         [DCCP_LISTEN]     = "LISTEN",
138         [DCCP_RESPOND]    = "RESPOND",
139         [DCCP_CLOSING]    = "CLOSING",
140         [DCCP_TIME_WAIT]  = "TIME_WAIT",
141         [DCCP_CLOSED]     = "CLOSED",
142         };
143
144         if (state >= DCCP_MAX_STATES)
145                 return "INVALID STATE!";
146         else
147                 return dccp_state_names[state];
148 }
149
150 EXPORT_SYMBOL_GPL(dccp_state_name);
151
152 void dccp_hash(struct sock *sk)
153 {
154         inet_hash(&dccp_hashinfo, sk);
155 }
156
157 EXPORT_SYMBOL_GPL(dccp_hash);
158
159 void dccp_unhash(struct sock *sk)
160 {
161         inet_unhash(&dccp_hashinfo, sk);
162 }
163
164 EXPORT_SYMBOL_GPL(dccp_unhash);
165
166 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
167 {
168         struct dccp_sock *dp = dccp_sk(sk);
169         struct dccp_minisock *dmsk = dccp_msk(sk);
170         struct inet_connection_sock *icsk = inet_csk(sk);
171
172         dccp_minisock_init(&dp->dccps_minisock);
173         do_gettimeofday(&dp->dccps_epoch);
174
175         /*
176          * FIXME: We're hardcoding the CCID, and doing this at this point makes
177          * the listening (master) sock get CCID control blocks, which is not
178          * necessary, but for now, to not mess with the test userspace apps,
179          * lets leave it here, later the real solution is to do this in a
180          * setsockopt(CCIDs-I-want/accept). -acme
181          */
182         if (likely(ctl_sock_initialized)) {
183                 int rc = dccp_feat_init(sk);
184
185                 if (rc)
186                         return rc;
187
188                 if (dmsk->dccpms_send_ack_vector) {
189                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
190                         if (dp->dccps_hc_rx_ackvec == NULL)
191                                 return -ENOMEM;
192                 }
193                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
194                                                       sk, GFP_KERNEL);
195                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
196                                                       sk, GFP_KERNEL);
197                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
198                              dp->dccps_hc_tx_ccid == NULL)) {
199                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
200                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
201                         if (dmsk->dccpms_send_ack_vector) {
202                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
203                                 dp->dccps_hc_rx_ackvec = NULL;
204                         }
205                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
206                         return -ENOMEM;
207                 }
208         } else {
209                 /* control socket doesn't need feat nego */
210                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
211                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
212         }
213
214         dccp_init_xmit_timers(sk);
215         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
216         sk->sk_state            = DCCP_CLOSED;
217         sk->sk_write_space      = dccp_write_space;
218         icsk->icsk_sync_mss     = dccp_sync_mss;
219         dp->dccps_mss_cache     = 536;
220         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
221         dp->dccps_service       = DCCP_SERVICE_INVALID_VALUE;
222         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
223
224         return 0;
225 }
226
227 EXPORT_SYMBOL_GPL(dccp_init_sock);
228
229 int dccp_destroy_sock(struct sock *sk)
230 {
231         struct dccp_sock *dp = dccp_sk(sk);
232
233         /*
234          * DCCP doesn't use sk_write_queue, just sk_send_head
235          * for retransmissions
236          */
237         if (sk->sk_send_head != NULL) {
238                 kfree_skb(sk->sk_send_head);
239                 sk->sk_send_head = NULL;
240         }
241
242         /* Clean up a referenced DCCP bind bucket. */
243         if (inet_csk(sk)->icsk_bind_hash != NULL)
244                 inet_put_port(&dccp_hashinfo, sk);
245
246         kfree(dp->dccps_service_list);
247         dp->dccps_service_list = NULL;
248
249         if (dccp_msk(sk)->dccpms_send_ack_vector) {
250                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
251                 dp->dccps_hc_rx_ackvec = NULL;
252         }
253         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
254         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
255         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
256
257         /* clean up feature negotiation state */
258         dccp_feat_clean(sk);
259
260         return 0;
261 }
262
263 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
264
265 static inline int dccp_listen_start(struct sock *sk)
266 {
267         struct dccp_sock *dp = dccp_sk(sk);
268
269         dp->dccps_role = DCCP_ROLE_LISTEN;
270         /*
271          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
272          * before calling listen()
273          */
274         if (dccp_service_not_initialized(sk))
275                 return -EPROTO;
276         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
277 }
278
279 int dccp_disconnect(struct sock *sk, int flags)
280 {
281         struct inet_connection_sock *icsk = inet_csk(sk);
282         struct inet_sock *inet = inet_sk(sk);
283         int err = 0;
284         const int old_state = sk->sk_state;
285
286         if (old_state != DCCP_CLOSED)
287                 dccp_set_state(sk, DCCP_CLOSED);
288
289         /* ABORT function of RFC793 */
290         if (old_state == DCCP_LISTEN) {
291                 inet_csk_listen_stop(sk);
292         /* FIXME: do the active reset thing */
293         } else if (old_state == DCCP_REQUESTING)
294                 sk->sk_err = ECONNRESET;
295
296         dccp_clear_xmit_timers(sk);
297         __skb_queue_purge(&sk->sk_receive_queue);
298         if (sk->sk_send_head != NULL) {
299                 __kfree_skb(sk->sk_send_head);
300                 sk->sk_send_head = NULL;
301         }
302
303         inet->dport = 0;
304
305         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
306                 inet_reset_saddr(sk);
307
308         sk->sk_shutdown = 0;
309         sock_reset_flag(sk, SOCK_DONE);
310
311         icsk->icsk_backoff = 0;
312         inet_csk_delack_init(sk);
313         __sk_dst_reset(sk);
314
315         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
316
317         sk->sk_error_report(sk);
318         return err;
319 }
320
321 EXPORT_SYMBOL_GPL(dccp_disconnect);
322
323 /*
324  *      Wait for a DCCP event.
325  *
326  *      Note that we don't need to lock the socket, as the upper poll layers
327  *      take care of normal races (between the test and the event) and we don't
328  *      go look at any of the socket buffers directly.
329  */
330 unsigned int dccp_poll(struct file *file, struct socket *sock,
331                        poll_table *wait)
332 {
333         unsigned int mask;
334         struct sock *sk = sock->sk;
335
336         poll_wait(file, sk->sk_sleep, wait);
337         if (sk->sk_state == DCCP_LISTEN)
338                 return inet_csk_listen_poll(sk);
339
340         /* Socket is not locked. We are protected from async events
341            by poll logic and correct handling of state changes
342            made by another threads is impossible in any case.
343          */
344
345         mask = 0;
346         if (sk->sk_err)
347                 mask = POLLERR;
348
349         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
350                 mask |= POLLHUP;
351         if (sk->sk_shutdown & RCV_SHUTDOWN)
352                 mask |= POLLIN | POLLRDNORM;
353
354         /* Connected? */
355         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
356                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
357                         mask |= POLLIN | POLLRDNORM;
358
359                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
360                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
361                                 mask |= POLLOUT | POLLWRNORM;
362                         } else {  /* send SIGIO later */
363                                 set_bit(SOCK_ASYNC_NOSPACE,
364                                         &sk->sk_socket->flags);
365                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
366
367                                 /* Race breaker. If space is freed after
368                                  * wspace test but before the flags are set,
369                                  * IO signal will be lost.
370                                  */
371                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
372                                         mask |= POLLOUT | POLLWRNORM;
373                         }
374                 }
375         }
376         return mask;
377 }
378
379 EXPORT_SYMBOL_GPL(dccp_poll);
380
381 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
382 {
383         dccp_pr_debug("entry\n");
384         return -ENOIOCTLCMD;
385 }
386
387 EXPORT_SYMBOL_GPL(dccp_ioctl);
388
389 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
390                                    char __user *optval, int optlen)
391 {
392         struct dccp_sock *dp = dccp_sk(sk);
393         struct dccp_service_list *sl = NULL;
394
395         if (service == DCCP_SERVICE_INVALID_VALUE || 
396             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
397                 return -EINVAL;
398
399         if (optlen > sizeof(service)) {
400                 sl = kmalloc(optlen, GFP_KERNEL);
401                 if (sl == NULL)
402                         return -ENOMEM;
403
404                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
405                 if (copy_from_user(sl->dccpsl_list,
406                                    optval + sizeof(service),
407                                    optlen - sizeof(service)) ||
408                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
409                         kfree(sl);
410                         return -EFAULT;
411                 }
412         }
413
414         lock_sock(sk);
415         dp->dccps_service = service;
416
417         kfree(dp->dccps_service_list);
418
419         dp->dccps_service_list = sl;
420         release_sock(sk);
421         return 0;
422 }
423
424 /* byte 1 is feature.  the rest is the preference list */
425 static int dccp_setsockopt_change(struct sock *sk, int type,
426                                   struct dccp_so_feat __user *optval)
427 {
428         struct dccp_so_feat opt;
429         u8 *val;
430         int rc;
431
432         if (copy_from_user(&opt, optval, sizeof(opt)))
433                 return -EFAULT;
434
435         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
436         if (!val)
437                 return -ENOMEM;
438
439         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
440                 rc = -EFAULT;
441                 goto out_free_val;
442         }
443
444         rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
445                               GFP_KERNEL);
446         if (rc)
447                 goto out_free_val;
448
449 out:
450         return rc;
451
452 out_free_val:
453         kfree(val);
454         goto out;
455 }
456
457 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
458                 char __user *optval, int optlen)
459 {
460         struct dccp_sock *dp;
461         int err;
462         int val;
463
464         if (optlen < sizeof(int))
465                 return -EINVAL;
466
467         if (get_user(val, (int __user *)optval))
468                 return -EFAULT;
469
470         if (optname == DCCP_SOCKOPT_SERVICE)
471                 return dccp_setsockopt_service(sk, val, optval, optlen);
472
473         lock_sock(sk);
474         dp = dccp_sk(sk);
475         err = 0;
476
477         switch (optname) {
478         case DCCP_SOCKOPT_PACKET_SIZE:
479                 dp->dccps_packet_size = val;
480                 break;
481
482         case DCCP_SOCKOPT_CHANGE_L:
483                 if (optlen != sizeof(struct dccp_so_feat))
484                         err = -EINVAL;
485                 else
486                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
487                                                      (struct dccp_so_feat *)
488                                                      optval);
489                 break;
490
491         case DCCP_SOCKOPT_CHANGE_R:
492                 if (optlen != sizeof(struct dccp_so_feat))
493                         err = -EINVAL;
494                 else
495                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
496                                                      (struct dccp_so_feat *)
497                                                      optval);
498                 break;
499
500         default:
501                 err = -ENOPROTOOPT;
502                 break;
503         }
504         
505         release_sock(sk);
506         return err;
507 }
508
509 int dccp_setsockopt(struct sock *sk, int level, int optname,
510                     char __user *optval, int optlen)
511 {
512         if (level != SOL_DCCP)
513                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
514                                                              optname, optval,
515                                                              optlen);
516         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
517 }
518
519 EXPORT_SYMBOL_GPL(dccp_setsockopt);
520
521 #ifdef CONFIG_COMPAT
522 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
523                            char __user *optval, int optlen)
524 {
525         if (level != SOL_DCCP)
526                 return inet_csk_compat_setsockopt(sk, level, optname,
527                                                   optval, optlen);
528         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
529 }
530
531 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
532 #endif
533
534 static int dccp_getsockopt_service(struct sock *sk, int len,
535                                    __be32 __user *optval,
536                                    int __user *optlen)
537 {
538         const struct dccp_sock *dp = dccp_sk(sk);
539         const struct dccp_service_list *sl;
540         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
541
542         lock_sock(sk);
543         if (dccp_service_not_initialized(sk))
544                 goto out;
545
546         if ((sl = dp->dccps_service_list) != NULL) {
547                 slen = sl->dccpsl_nr * sizeof(u32);
548                 total_len += slen;
549         }
550
551         err = -EINVAL;
552         if (total_len > len)
553                 goto out;
554
555         err = 0;
556         if (put_user(total_len, optlen) ||
557             put_user(dp->dccps_service, optval) ||
558             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
559                 err = -EFAULT;
560 out:
561         release_sock(sk);
562         return err;
563 }
564
565 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
566                     char __user *optval, int __user *optlen)
567 {
568         struct dccp_sock *dp;
569         int val, len;
570
571         if (get_user(len, optlen))
572                 return -EFAULT;
573
574         if (len < sizeof(int))
575                 return -EINVAL;
576
577         dp = dccp_sk(sk);
578
579         switch (optname) {
580         case DCCP_SOCKOPT_PACKET_SIZE:
581                 val = dp->dccps_packet_size;
582                 len = sizeof(dp->dccps_packet_size);
583                 break;
584         case DCCP_SOCKOPT_SERVICE:
585                 return dccp_getsockopt_service(sk, len,
586                                                (__be32 __user *)optval, optlen);
587         case 128 ... 191:
588                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
589                                              len, (u32 __user *)optval, optlen);
590         case 192 ... 255:
591                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
592                                              len, (u32 __user *)optval, optlen);
593         default:
594                 return -ENOPROTOOPT;
595         }
596
597         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
598                 return -EFAULT;
599
600         return 0;
601 }
602
603 int dccp_getsockopt(struct sock *sk, int level, int optname,
604                     char __user *optval, int __user *optlen)
605 {
606         if (level != SOL_DCCP)
607                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
608                                                              optname, optval,
609                                                              optlen);
610         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
611 }
612
613 EXPORT_SYMBOL_GPL(dccp_getsockopt);
614
615 #ifdef CONFIG_COMPAT
616 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
617                            char __user *optval, int __user *optlen)
618 {
619         if (level != SOL_DCCP)
620                 return inet_csk_compat_getsockopt(sk, level, optname,
621                                                   optval, optlen);
622         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
623 }
624
625 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
626 #endif
627
628 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
629                  size_t len)
630 {
631         const struct dccp_sock *dp = dccp_sk(sk);
632         const int flags = msg->msg_flags;
633         const int noblock = flags & MSG_DONTWAIT;
634         struct sk_buff *skb;
635         int rc, size;
636         long timeo;
637
638         if (len > dp->dccps_mss_cache)
639                 return -EMSGSIZE;
640
641         lock_sock(sk);
642         timeo = sock_sndtimeo(sk, noblock);
643
644         /*
645          * We have to use sk_stream_wait_connect here to set sk_write_pending,
646          * so that the trick in dccp_rcv_request_sent_state_process.
647          */
648         /* Wait for a connection to finish. */
649         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
650                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
651                         goto out_release;
652
653         size = sk->sk_prot->max_header + len;
654         release_sock(sk);
655         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
656         lock_sock(sk);
657         if (skb == NULL)
658                 goto out_release;
659
660         skb_reserve(skb, sk->sk_prot->max_header);
661         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
662         if (rc != 0)
663                 goto out_discard;
664
665         rc = dccp_write_xmit(sk, skb, &timeo);
666         /*
667          * XXX we don't use sk_write_queue, so just discard the packet.
668          *     Current plan however is to _use_ sk_write_queue with
669          *     an algorith similar to tcp_sendmsg, where the main difference
670          *     is that in DCCP we have to respect packet boundaries, so
671          *     no coalescing of skbs.
672          *
673          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
674          *     generated callgraph 8) -acme
675          */
676 out_release:
677         release_sock(sk);
678         return rc ? : len;
679 out_discard:
680         kfree_skb(skb);
681         goto out_release;
682 }
683
684 EXPORT_SYMBOL_GPL(dccp_sendmsg);
685
686 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
687                  size_t len, int nonblock, int flags, int *addr_len)
688 {
689         const struct dccp_hdr *dh;
690         long timeo;
691
692         lock_sock(sk);
693
694         if (sk->sk_state == DCCP_LISTEN) {
695                 len = -ENOTCONN;
696                 goto out;
697         }
698
699         timeo = sock_rcvtimeo(sk, nonblock);
700
701         do {
702                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
703
704                 if (skb == NULL)
705                         goto verify_sock_status;
706
707                 dh = dccp_hdr(skb);
708
709                 if (dh->dccph_type == DCCP_PKT_DATA ||
710                     dh->dccph_type == DCCP_PKT_DATAACK)
711                         goto found_ok_skb;
712
713                 if (dh->dccph_type == DCCP_PKT_RESET ||
714                     dh->dccph_type == DCCP_PKT_CLOSE) {
715                         dccp_pr_debug("found fin ok!\n");
716                         len = 0;
717                         goto found_fin_ok;
718                 }
719                 dccp_pr_debug("packet_type=%s\n",
720                               dccp_packet_name(dh->dccph_type));
721                 sk_eat_skb(sk, skb);
722 verify_sock_status:
723                 if (sock_flag(sk, SOCK_DONE)) {
724                         len = 0;
725                         break;
726                 }
727
728                 if (sk->sk_err) {
729                         len = sock_error(sk);
730                         break;
731                 }
732
733                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
734                         len = 0;
735                         break;
736                 }
737
738                 if (sk->sk_state == DCCP_CLOSED) {
739                         if (!sock_flag(sk, SOCK_DONE)) {
740                                 /* This occurs when user tries to read
741                                  * from never connected socket.
742                                  */
743                                 len = -ENOTCONN;
744                                 break;
745                         }
746                         len = 0;
747                         break;
748                 }
749
750                 if (!timeo) {
751                         len = -EAGAIN;
752                         break;
753                 }
754
755                 if (signal_pending(current)) {
756                         len = sock_intr_errno(timeo);
757                         break;
758                 }
759
760                 sk_wait_data(sk, &timeo);
761                 continue;
762         found_ok_skb:
763                 if (len > skb->len)
764                         len = skb->len;
765                 else if (len < skb->len)
766                         msg->msg_flags |= MSG_TRUNC;
767
768                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
769                         /* Exception. Bailout! */
770                         len = -EFAULT;
771                         break;
772                 }
773         found_fin_ok:
774                 if (!(flags & MSG_PEEK))
775                         sk_eat_skb(sk, skb);
776                 break;
777         } while (1);
778 out:
779         release_sock(sk);
780         return len;
781 }
782
783 EXPORT_SYMBOL_GPL(dccp_recvmsg);
784
785 int inet_dccp_listen(struct socket *sock, int backlog)
786 {
787         struct sock *sk = sock->sk;
788         unsigned char old_state;
789         int err;
790
791         lock_sock(sk);
792
793         err = -EINVAL;
794         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
795                 goto out;
796
797         old_state = sk->sk_state;
798         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
799                 goto out;
800
801         /* Really, if the socket is already in listen state
802          * we can only allow the backlog to be adjusted.
803          */
804         if (old_state != DCCP_LISTEN) {
805                 /*
806                  * FIXME: here it probably should be sk->sk_prot->listen_start
807                  * see tcp_listen_start
808                  */
809                 err = dccp_listen_start(sk);
810                 if (err)
811                         goto out;
812         }
813         sk->sk_max_ack_backlog = backlog;
814         err = 0;
815
816 out:
817         release_sock(sk);
818         return err;
819 }
820
821 EXPORT_SYMBOL_GPL(inet_dccp_listen);
822
823 static const unsigned char dccp_new_state[] = {
824         /* current state:   new state:      action:     */
825         [0]               = DCCP_CLOSED,
826         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
827         [DCCP_REQUESTING] = DCCP_CLOSED,
828         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
829         [DCCP_LISTEN]     = DCCP_CLOSED,
830         [DCCP_RESPOND]    = DCCP_CLOSED,
831         [DCCP_CLOSING]    = DCCP_CLOSED,
832         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
833         [DCCP_CLOSED]     = DCCP_CLOSED,
834 };
835
836 static int dccp_close_state(struct sock *sk)
837 {
838         const int next = dccp_new_state[sk->sk_state];
839         const int ns = next & DCCP_STATE_MASK;
840
841         if (ns != sk->sk_state)
842                 dccp_set_state(sk, ns);
843
844         return next & DCCP_ACTION_FIN;
845 }
846
847 void dccp_close(struct sock *sk, long timeout)
848 {
849         struct sk_buff *skb;
850
851         lock_sock(sk);
852
853         sk->sk_shutdown = SHUTDOWN_MASK;
854
855         if (sk->sk_state == DCCP_LISTEN) {
856                 dccp_set_state(sk, DCCP_CLOSED);
857
858                 /* Special case. */
859                 inet_csk_listen_stop(sk);
860
861                 goto adjudge_to_death;
862         }
863
864         /*
865          * We need to flush the recv. buffs.  We do this only on the
866          * descriptor close, not protocol-sourced closes, because the
867           *reader process may not have drained the data yet!
868          */
869         /* FIXME: check for unread data */
870         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
871                 __kfree_skb(skb);
872         }
873
874         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
875                 /* Check zero linger _after_ checking for unread data. */
876                 sk->sk_prot->disconnect(sk, 0);
877         } else if (dccp_close_state(sk)) {
878                 dccp_send_close(sk, 1);
879         }
880
881         sk_stream_wait_close(sk, timeout);
882
883 adjudge_to_death:
884         /*
885          * It is the last release_sock in its life. It will remove backlog.
886          */
887         release_sock(sk);
888         /*
889          * Now socket is owned by kernel and we acquire BH lock
890          * to finish close. No need to check for user refs.
891          */
892         local_bh_disable();
893         bh_lock_sock(sk);
894         BUG_TRAP(!sock_owned_by_user(sk));
895
896         sock_hold(sk);
897         sock_orphan(sk);
898
899         /*
900          * The last release_sock may have processed the CLOSE or RESET
901          * packet moving sock to CLOSED state, if not we have to fire
902          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
903          * in draft-ietf-dccp-spec-11. -acme
904          */
905         if (sk->sk_state == DCCP_CLOSING) {
906                 /* FIXME: should start at 2 * RTT */
907                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
908                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
909                                           inet_csk(sk)->icsk_rto,
910                                           DCCP_RTO_MAX);
911 #if 0
912                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
913                 dccp_set_state(sk, DCCP_CLOSED);
914 #endif
915         }
916
917         atomic_inc(sk->sk_prot->orphan_count);
918         if (sk->sk_state == DCCP_CLOSED)
919                 inet_csk_destroy_sock(sk);
920
921         /* Otherwise, socket is reprieved until protocol close. */
922
923         bh_unlock_sock(sk);
924         local_bh_enable();
925         sock_put(sk);
926 }
927
928 EXPORT_SYMBOL_GPL(dccp_close);
929
930 void dccp_shutdown(struct sock *sk, int how)
931 {
932         dccp_pr_debug("entry\n");
933 }
934
935 EXPORT_SYMBOL_GPL(dccp_shutdown);
936
937 static int __init dccp_mib_init(void)
938 {
939         int rc = -ENOMEM;
940
941         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
942         if (dccp_statistics[0] == NULL)
943                 goto out;
944
945         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
946         if (dccp_statistics[1] == NULL)
947                 goto out_free_one;
948
949         rc = 0;
950 out:
951         return rc;
952 out_free_one:
953         free_percpu(dccp_statistics[0]);
954         dccp_statistics[0] = NULL;
955         goto out;
956
957 }
958
959 static void dccp_mib_exit(void)
960 {
961         free_percpu(dccp_statistics[0]);
962         free_percpu(dccp_statistics[1]);
963         dccp_statistics[0] = dccp_statistics[1] = NULL;
964 }
965
966 static int thash_entries;
967 module_param(thash_entries, int, 0444);
968 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
969
970 #ifdef CONFIG_IP_DCCP_DEBUG
971 int dccp_debug;
972 module_param(dccp_debug, int, 0444);
973 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
974
975 EXPORT_SYMBOL_GPL(dccp_debug);
976 #endif
977
978 static int __init dccp_init(void)
979 {
980         unsigned long goal;
981         int ehash_order, bhash_order, i;
982         int rc = -ENOBUFS;
983
984         dccp_hashinfo.bind_bucket_cachep =
985                 kmem_cache_create("dccp_bind_bucket",
986                                   sizeof(struct inet_bind_bucket), 0,
987                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
988         if (!dccp_hashinfo.bind_bucket_cachep)
989                 goto out;
990
991         /*
992          * Size and allocate the main established and bind bucket
993          * hash tables.
994          *
995          * The methodology is similar to that of the buffer cache.
996          */
997         if (num_physpages >= (128 * 1024))
998                 goal = num_physpages >> (21 - PAGE_SHIFT);
999         else
1000                 goal = num_physpages >> (23 - PAGE_SHIFT);
1001
1002         if (thash_entries)
1003                 goal = (thash_entries *
1004                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1005         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1006                 ;
1007         do {
1008                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1009                                         sizeof(struct inet_ehash_bucket);
1010                 dccp_hashinfo.ehash_size >>= 1;
1011                 while (dccp_hashinfo.ehash_size &
1012                        (dccp_hashinfo.ehash_size - 1))
1013                         dccp_hashinfo.ehash_size--;
1014                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1015                         __get_free_pages(GFP_ATOMIC, ehash_order);
1016         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1017
1018         if (!dccp_hashinfo.ehash) {
1019                 printk(KERN_CRIT "Failed to allocate DCCP "
1020                                  "established hash table\n");
1021                 goto out_free_bind_bucket_cachep;
1022         }
1023
1024         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1025                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1026                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1027         }
1028
1029         bhash_order = ehash_order;
1030
1031         do {
1032                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1033                                         sizeof(struct inet_bind_hashbucket);
1034                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1035                     bhash_order > 0)
1036                         continue;
1037                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1038                         __get_free_pages(GFP_ATOMIC, bhash_order);
1039         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1040
1041         if (!dccp_hashinfo.bhash) {
1042                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1043                 goto out_free_dccp_ehash;
1044         }
1045
1046         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1047                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1048                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1049         }
1050
1051         rc = dccp_mib_init();
1052         if (rc)
1053                 goto out_free_dccp_bhash;
1054
1055         rc = dccp_ackvec_init();
1056         if (rc)
1057                 goto out_free_dccp_mib;
1058
1059         rc = dccp_sysctl_init();
1060         if (rc)
1061                 goto out_ackvec_exit;
1062 out:
1063         return rc;
1064 out_ackvec_exit:
1065         dccp_ackvec_exit();
1066 out_free_dccp_mib:
1067         dccp_mib_exit();
1068 out_free_dccp_bhash:
1069         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1070         dccp_hashinfo.bhash = NULL;
1071 out_free_dccp_ehash:
1072         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1073         dccp_hashinfo.ehash = NULL;
1074 out_free_bind_bucket_cachep:
1075         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1076         dccp_hashinfo.bind_bucket_cachep = NULL;
1077         goto out;
1078 }
1079
1080 static void __exit dccp_fini(void)
1081 {
1082         dccp_mib_exit();
1083         free_pages((unsigned long)dccp_hashinfo.bhash,
1084                    get_order(dccp_hashinfo.bhash_size *
1085                              sizeof(struct inet_bind_hashbucket)));
1086         free_pages((unsigned long)dccp_hashinfo.ehash,
1087                    get_order(dccp_hashinfo.ehash_size *
1088                              sizeof(struct inet_ehash_bucket)));
1089         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1090         dccp_ackvec_exit();
1091         dccp_sysctl_exit();
1092 }
1093
1094 module_init(dccp_init);
1095 module_exit(dccp_fini);
1096
1097 MODULE_LICENSE("GPL");
1098 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1099 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");