[NET]: Size listen hash tables using backlog hint
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 void dccp_set_state(struct sock *sk, const int state)
56 {
57         const int oldstate = sk->sk_state;
58
59         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
60                       dccp_role(sk), sk,
61                       dccp_state_name(oldstate), dccp_state_name(state));
62         WARN_ON(state == oldstate);
63
64         switch (state) {
65         case DCCP_OPEN:
66                 if (oldstate != DCCP_OPEN)
67                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
68                 break;
69
70         case DCCP_CLOSED:
71                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
72                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74                 sk->sk_prot->unhash(sk);
75                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77                         inet_put_port(&dccp_hashinfo, sk);
78                 /* fall through */
79         default:
80                 if (oldstate == DCCP_OPEN)
81                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82         }
83
84         /* Change state AFTER socket is unhashed to avoid closed
85          * socket sitting in hash tables.
86          */
87         sk->sk_state = state;
88 }
89
90 EXPORT_SYMBOL_GPL(dccp_set_state);
91
92 void dccp_done(struct sock *sk)
93 {
94         dccp_set_state(sk, DCCP_CLOSED);
95         dccp_clear_xmit_timers(sk);
96
97         sk->sk_shutdown = SHUTDOWN_MASK;
98
99         if (!sock_flag(sk, SOCK_DEAD))
100                 sk->sk_state_change(sk);
101         else
102                 inet_csk_destroy_sock(sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_done);
106
107 const char *dccp_packet_name(const int type)
108 {
109         static const char *dccp_packet_names[] = {
110                 [DCCP_PKT_REQUEST]  = "REQUEST",
111                 [DCCP_PKT_RESPONSE] = "RESPONSE",
112                 [DCCP_PKT_DATA]     = "DATA",
113                 [DCCP_PKT_ACK]      = "ACK",
114                 [DCCP_PKT_DATAACK]  = "DATAACK",
115                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
116                 [DCCP_PKT_CLOSE]    = "CLOSE",
117                 [DCCP_PKT_RESET]    = "RESET",
118                 [DCCP_PKT_SYNC]     = "SYNC",
119                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
120         };
121
122         if (type >= DCCP_NR_PKT_TYPES)
123                 return "INVALID";
124         else
125                 return dccp_packet_names[type];
126 }
127
128 EXPORT_SYMBOL_GPL(dccp_packet_name);
129
130 const char *dccp_state_name(const int state)
131 {
132         static char *dccp_state_names[] = {
133         [DCCP_OPEN]       = "OPEN",
134         [DCCP_REQUESTING] = "REQUESTING",
135         [DCCP_PARTOPEN]   = "PARTOPEN",
136         [DCCP_LISTEN]     = "LISTEN",
137         [DCCP_RESPOND]    = "RESPOND",
138         [DCCP_CLOSING]    = "CLOSING",
139         [DCCP_TIME_WAIT]  = "TIME_WAIT",
140         [DCCP_CLOSED]     = "CLOSED",
141         };
142
143         if (state >= DCCP_MAX_STATES)
144                 return "INVALID STATE!";
145         else
146                 return dccp_state_names[state];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_state_name);
150
151 void dccp_hash(struct sock *sk)
152 {
153         inet_hash(&dccp_hashinfo, sk);
154 }
155
156 EXPORT_SYMBOL_GPL(dccp_hash);
157
158 void dccp_unhash(struct sock *sk)
159 {
160         inet_unhash(&dccp_hashinfo, sk);
161 }
162
163 EXPORT_SYMBOL_GPL(dccp_unhash);
164
165 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
166 {
167         struct dccp_sock *dp = dccp_sk(sk);
168         struct dccp_minisock *dmsk = dccp_msk(sk);
169         struct inet_connection_sock *icsk = inet_csk(sk);
170
171         dccp_minisock_init(&dp->dccps_minisock);
172         do_gettimeofday(&dp->dccps_epoch);
173
174         /*
175          * FIXME: We're hardcoding the CCID, and doing this at this point makes
176          * the listening (master) sock get CCID control blocks, which is not
177          * necessary, but for now, to not mess with the test userspace apps,
178          * lets leave it here, later the real solution is to do this in a
179          * setsockopt(CCIDs-I-want/accept). -acme
180          */
181         if (likely(ctl_sock_initialized)) {
182                 int rc = dccp_feat_init(dmsk);
183
184                 if (rc)
185                         return rc;
186
187                 if (dmsk->dccpms_send_ack_vector) {
188                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
189                         if (dp->dccps_hc_rx_ackvec == NULL)
190                                 return -ENOMEM;
191                 }
192                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
193                                                       sk, GFP_KERNEL);
194                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
195                                                       sk, GFP_KERNEL);
196                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
197                              dp->dccps_hc_tx_ccid == NULL)) {
198                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
199                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
200                         if (dmsk->dccpms_send_ack_vector) {
201                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202                                 dp->dccps_hc_rx_ackvec = NULL;
203                         }
204                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
205                         return -ENOMEM;
206                 }
207         } else {
208                 /* control socket doesn't need feat nego */
209                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
210                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
211         }
212
213         dccp_init_xmit_timers(sk);
214         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
215         sk->sk_state            = DCCP_CLOSED;
216         sk->sk_write_space      = dccp_write_space;
217         icsk->icsk_sync_mss     = dccp_sync_mss;
218         dp->dccps_mss_cache     = 536;
219         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
220         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
221         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
222
223         return 0;
224 }
225
226 EXPORT_SYMBOL_GPL(dccp_init_sock);
227
228 int dccp_destroy_sock(struct sock *sk)
229 {
230         struct dccp_sock *dp = dccp_sk(sk);
231         struct dccp_minisock *dmsk = dccp_msk(sk);
232
233         /*
234          * DCCP doesn't use sk_write_queue, just sk_send_head
235          * for retransmissions
236          */
237         if (sk->sk_send_head != NULL) {
238                 kfree_skb(sk->sk_send_head);
239                 sk->sk_send_head = NULL;
240         }
241
242         /* Clean up a referenced DCCP bind bucket. */
243         if (inet_csk(sk)->icsk_bind_hash != NULL)
244                 inet_put_port(&dccp_hashinfo, sk);
245
246         kfree(dp->dccps_service_list);
247         dp->dccps_service_list = NULL;
248
249         if (dmsk->dccpms_send_ack_vector) {
250                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
251                 dp->dccps_hc_rx_ackvec = NULL;
252         }
253         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
254         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
255         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
256
257         /* clean up feature negotiation state */
258         dccp_feat_clean(dmsk);
259
260         return 0;
261 }
262
263 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
264
265 static inline int dccp_listen_start(struct sock *sk, int backlog)
266 {
267         struct dccp_sock *dp = dccp_sk(sk);
268
269         dp->dccps_role = DCCP_ROLE_LISTEN;
270         return inet_csk_listen_start(sk, backlog);
271 }
272
273 int dccp_disconnect(struct sock *sk, int flags)
274 {
275         struct inet_connection_sock *icsk = inet_csk(sk);
276         struct inet_sock *inet = inet_sk(sk);
277         int err = 0;
278         const int old_state = sk->sk_state;
279
280         if (old_state != DCCP_CLOSED)
281                 dccp_set_state(sk, DCCP_CLOSED);
282
283         /* ABORT function of RFC793 */
284         if (old_state == DCCP_LISTEN) {
285                 inet_csk_listen_stop(sk);
286         /* FIXME: do the active reset thing */
287         } else if (old_state == DCCP_REQUESTING)
288                 sk->sk_err = ECONNRESET;
289
290         dccp_clear_xmit_timers(sk);
291         __skb_queue_purge(&sk->sk_receive_queue);
292         if (sk->sk_send_head != NULL) {
293                 __kfree_skb(sk->sk_send_head);
294                 sk->sk_send_head = NULL;
295         }
296
297         inet->dport = 0;
298
299         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
300                 inet_reset_saddr(sk);
301
302         sk->sk_shutdown = 0;
303         sock_reset_flag(sk, SOCK_DONE);
304
305         icsk->icsk_backoff = 0;
306         inet_csk_delack_init(sk);
307         __sk_dst_reset(sk);
308
309         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
310
311         sk->sk_error_report(sk);
312         return err;
313 }
314
315 EXPORT_SYMBOL_GPL(dccp_disconnect);
316
317 /*
318  *      Wait for a DCCP event.
319  *
320  *      Note that we don't need to lock the socket, as the upper poll layers
321  *      take care of normal races (between the test and the event) and we don't
322  *      go look at any of the socket buffers directly.
323  */
324 unsigned int dccp_poll(struct file *file, struct socket *sock,
325                        poll_table *wait)
326 {
327         unsigned int mask;
328         struct sock *sk = sock->sk;
329
330         poll_wait(file, sk->sk_sleep, wait);
331         if (sk->sk_state == DCCP_LISTEN)
332                 return inet_csk_listen_poll(sk);
333
334         /* Socket is not locked. We are protected from async events
335            by poll logic and correct handling of state changes
336            made by another threads is impossible in any case.
337          */
338
339         mask = 0;
340         if (sk->sk_err)
341                 mask = POLLERR;
342
343         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
344                 mask |= POLLHUP;
345         if (sk->sk_shutdown & RCV_SHUTDOWN)
346                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
347
348         /* Connected? */
349         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
350                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
351                         mask |= POLLIN | POLLRDNORM;
352
353                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
354                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
355                                 mask |= POLLOUT | POLLWRNORM;
356                         } else {  /* send SIGIO later */
357                                 set_bit(SOCK_ASYNC_NOSPACE,
358                                         &sk->sk_socket->flags);
359                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
360
361                                 /* Race breaker. If space is freed after
362                                  * wspace test but before the flags are set,
363                                  * IO signal will be lost.
364                                  */
365                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
366                                         mask |= POLLOUT | POLLWRNORM;
367                         }
368                 }
369         }
370         return mask;
371 }
372
373 EXPORT_SYMBOL_GPL(dccp_poll);
374
375 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
376 {
377         dccp_pr_debug("entry\n");
378         return -ENOIOCTLCMD;
379 }
380
381 EXPORT_SYMBOL_GPL(dccp_ioctl);
382
383 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
384                                    char __user *optval, int optlen)
385 {
386         struct dccp_sock *dp = dccp_sk(sk);
387         struct dccp_service_list *sl = NULL;
388
389         if (service == DCCP_SERVICE_INVALID_VALUE || 
390             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
391                 return -EINVAL;
392
393         if (optlen > sizeof(service)) {
394                 sl = kmalloc(optlen, GFP_KERNEL);
395                 if (sl == NULL)
396                         return -ENOMEM;
397
398                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
399                 if (copy_from_user(sl->dccpsl_list,
400                                    optval + sizeof(service),
401                                    optlen - sizeof(service)) ||
402                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
403                         kfree(sl);
404                         return -EFAULT;
405                 }
406         }
407
408         lock_sock(sk);
409         dp->dccps_service = service;
410
411         kfree(dp->dccps_service_list);
412
413         dp->dccps_service_list = sl;
414         release_sock(sk);
415         return 0;
416 }
417
418 /* byte 1 is feature.  the rest is the preference list */
419 static int dccp_setsockopt_change(struct sock *sk, int type,
420                                   struct dccp_so_feat __user *optval)
421 {
422         struct dccp_so_feat opt;
423         u8 *val;
424         int rc;
425
426         if (copy_from_user(&opt, optval, sizeof(opt)))
427                 return -EFAULT;
428
429         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
430         if (!val)
431                 return -ENOMEM;
432
433         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
434                 rc = -EFAULT;
435                 goto out_free_val;
436         }
437
438         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
439                               val, opt.dccpsf_len, GFP_KERNEL);
440         if (rc)
441                 goto out_free_val;
442
443 out:
444         return rc;
445
446 out_free_val:
447         kfree(val);
448         goto out;
449 }
450
451 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
452                 char __user *optval, int optlen)
453 {
454         struct dccp_sock *dp;
455         int err;
456         int val;
457
458         if (optlen < sizeof(int))
459                 return -EINVAL;
460
461         if (get_user(val, (int __user *)optval))
462                 return -EFAULT;
463
464         if (optname == DCCP_SOCKOPT_SERVICE)
465                 return dccp_setsockopt_service(sk, val, optval, optlen);
466
467         lock_sock(sk);
468         dp = dccp_sk(sk);
469         err = 0;
470
471         switch (optname) {
472         case DCCP_SOCKOPT_PACKET_SIZE:
473                 dp->dccps_packet_size = val;
474                 break;
475
476         case DCCP_SOCKOPT_CHANGE_L:
477                 if (optlen != sizeof(struct dccp_so_feat))
478                         err = -EINVAL;
479                 else
480                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
481                                                      (struct dccp_so_feat __user *)
482                                                      optval);
483                 break;
484
485         case DCCP_SOCKOPT_CHANGE_R:
486                 if (optlen != sizeof(struct dccp_so_feat))
487                         err = -EINVAL;
488                 else
489                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
490                                                      (struct dccp_so_feat __user *)
491                                                      optval);
492                 break;
493
494         default:
495                 err = -ENOPROTOOPT;
496                 break;
497         }
498         
499         release_sock(sk);
500         return err;
501 }
502
503 int dccp_setsockopt(struct sock *sk, int level, int optname,
504                     char __user *optval, int optlen)
505 {
506         if (level != SOL_DCCP)
507                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
508                                                              optname, optval,
509                                                              optlen);
510         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
511 }
512
513 EXPORT_SYMBOL_GPL(dccp_setsockopt);
514
515 #ifdef CONFIG_COMPAT
516 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
517                            char __user *optval, int optlen)
518 {
519         if (level != SOL_DCCP)
520                 return inet_csk_compat_setsockopt(sk, level, optname,
521                                                   optval, optlen);
522         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
523 }
524
525 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
526 #endif
527
528 static int dccp_getsockopt_service(struct sock *sk, int len,
529                                    __be32 __user *optval,
530                                    int __user *optlen)
531 {
532         const struct dccp_sock *dp = dccp_sk(sk);
533         const struct dccp_service_list *sl;
534         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
535
536         lock_sock(sk);
537         if ((sl = dp->dccps_service_list) != NULL) {
538                 slen = sl->dccpsl_nr * sizeof(u32);
539                 total_len += slen;
540         }
541
542         err = -EINVAL;
543         if (total_len > len)
544                 goto out;
545
546         err = 0;
547         if (put_user(total_len, optlen) ||
548             put_user(dp->dccps_service, optval) ||
549             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
550                 err = -EFAULT;
551 out:
552         release_sock(sk);
553         return err;
554 }
555
556 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
557                     char __user *optval, int __user *optlen)
558 {
559         struct dccp_sock *dp;
560         int val, len;
561
562         if (get_user(len, optlen))
563                 return -EFAULT;
564
565         if (len < sizeof(int))
566                 return -EINVAL;
567
568         dp = dccp_sk(sk);
569
570         switch (optname) {
571         case DCCP_SOCKOPT_PACKET_SIZE:
572                 val = dp->dccps_packet_size;
573                 len = sizeof(dp->dccps_packet_size);
574                 break;
575         case DCCP_SOCKOPT_SERVICE:
576                 return dccp_getsockopt_service(sk, len,
577                                                (__be32 __user *)optval, optlen);
578         case 128 ... 191:
579                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
580                                              len, (u32 __user *)optval, optlen);
581         case 192 ... 255:
582                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
583                                              len, (u32 __user *)optval, optlen);
584         default:
585                 return -ENOPROTOOPT;
586         }
587
588         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
589                 return -EFAULT;
590
591         return 0;
592 }
593
594 int dccp_getsockopt(struct sock *sk, int level, int optname,
595                     char __user *optval, int __user *optlen)
596 {
597         if (level != SOL_DCCP)
598                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
599                                                              optname, optval,
600                                                              optlen);
601         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
602 }
603
604 EXPORT_SYMBOL_GPL(dccp_getsockopt);
605
606 #ifdef CONFIG_COMPAT
607 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
608                            char __user *optval, int __user *optlen)
609 {
610         if (level != SOL_DCCP)
611                 return inet_csk_compat_getsockopt(sk, level, optname,
612                                                   optval, optlen);
613         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
614 }
615
616 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
617 #endif
618
619 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
620                  size_t len)
621 {
622         const struct dccp_sock *dp = dccp_sk(sk);
623         const int flags = msg->msg_flags;
624         const int noblock = flags & MSG_DONTWAIT;
625         struct sk_buff *skb;
626         int rc, size;
627         long timeo;
628
629         if (len > dp->dccps_mss_cache)
630                 return -EMSGSIZE;
631
632         lock_sock(sk);
633         timeo = sock_sndtimeo(sk, noblock);
634
635         /*
636          * We have to use sk_stream_wait_connect here to set sk_write_pending,
637          * so that the trick in dccp_rcv_request_sent_state_process.
638          */
639         /* Wait for a connection to finish. */
640         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
641                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
642                         goto out_release;
643
644         size = sk->sk_prot->max_header + len;
645         release_sock(sk);
646         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
647         lock_sock(sk);
648         if (skb == NULL)
649                 goto out_release;
650
651         skb_reserve(skb, sk->sk_prot->max_header);
652         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
653         if (rc != 0)
654                 goto out_discard;
655
656         skb_queue_tail(&sk->sk_write_queue, skb);
657         dccp_write_xmit(sk,0);
658 out_release:
659         release_sock(sk);
660         return rc ? : len;
661 out_discard:
662         kfree_skb(skb);
663         goto out_release;
664 }
665
666 EXPORT_SYMBOL_GPL(dccp_sendmsg);
667
668 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
669                  size_t len, int nonblock, int flags, int *addr_len)
670 {
671         const struct dccp_hdr *dh;
672         long timeo;
673
674         lock_sock(sk);
675
676         if (sk->sk_state == DCCP_LISTEN) {
677                 len = -ENOTCONN;
678                 goto out;
679         }
680
681         timeo = sock_rcvtimeo(sk, nonblock);
682
683         do {
684                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
685
686                 if (skb == NULL)
687                         goto verify_sock_status;
688
689                 dh = dccp_hdr(skb);
690
691                 if (dh->dccph_type == DCCP_PKT_DATA ||
692                     dh->dccph_type == DCCP_PKT_DATAACK)
693                         goto found_ok_skb;
694
695                 if (dh->dccph_type == DCCP_PKT_RESET ||
696                     dh->dccph_type == DCCP_PKT_CLOSE) {
697                         dccp_pr_debug("found fin ok!\n");
698                         len = 0;
699                         goto found_fin_ok;
700                 }
701                 dccp_pr_debug("packet_type=%s\n",
702                               dccp_packet_name(dh->dccph_type));
703                 sk_eat_skb(sk, skb, 0);
704 verify_sock_status:
705                 if (sock_flag(sk, SOCK_DONE)) {
706                         len = 0;
707                         break;
708                 }
709
710                 if (sk->sk_err) {
711                         len = sock_error(sk);
712                         break;
713                 }
714
715                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
716                         len = 0;
717                         break;
718                 }
719
720                 if (sk->sk_state == DCCP_CLOSED) {
721                         if (!sock_flag(sk, SOCK_DONE)) {
722                                 /* This occurs when user tries to read
723                                  * from never connected socket.
724                                  */
725                                 len = -ENOTCONN;
726                                 break;
727                         }
728                         len = 0;
729                         break;
730                 }
731
732                 if (!timeo) {
733                         len = -EAGAIN;
734                         break;
735                 }
736
737                 if (signal_pending(current)) {
738                         len = sock_intr_errno(timeo);
739                         break;
740                 }
741
742                 sk_wait_data(sk, &timeo);
743                 continue;
744         found_ok_skb:
745                 if (len > skb->len)
746                         len = skb->len;
747                 else if (len < skb->len)
748                         msg->msg_flags |= MSG_TRUNC;
749
750                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
751                         /* Exception. Bailout! */
752                         len = -EFAULT;
753                         break;
754                 }
755         found_fin_ok:
756                 if (!(flags & MSG_PEEK))
757                         sk_eat_skb(sk, skb, 0);
758                 break;
759         } while (1);
760 out:
761         release_sock(sk);
762         return len;
763 }
764
765 EXPORT_SYMBOL_GPL(dccp_recvmsg);
766
767 int inet_dccp_listen(struct socket *sock, int backlog)
768 {
769         struct sock *sk = sock->sk;
770         unsigned char old_state;
771         int err;
772
773         lock_sock(sk);
774
775         err = -EINVAL;
776         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
777                 goto out;
778
779         old_state = sk->sk_state;
780         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
781                 goto out;
782
783         /* Really, if the socket is already in listen state
784          * we can only allow the backlog to be adjusted.
785          */
786         if (old_state != DCCP_LISTEN) {
787                 /*
788                  * FIXME: here it probably should be sk->sk_prot->listen_start
789                  * see tcp_listen_start
790                  */
791                 err = dccp_listen_start(sk, backlog);
792                 if (err)
793                         goto out;
794         }
795         sk->sk_max_ack_backlog = backlog;
796         err = 0;
797
798 out:
799         release_sock(sk);
800         return err;
801 }
802
803 EXPORT_SYMBOL_GPL(inet_dccp_listen);
804
805 static const unsigned char dccp_new_state[] = {
806         /* current state:   new state:      action:     */
807         [0]               = DCCP_CLOSED,
808         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
809         [DCCP_REQUESTING] = DCCP_CLOSED,
810         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
811         [DCCP_LISTEN]     = DCCP_CLOSED,
812         [DCCP_RESPOND]    = DCCP_CLOSED,
813         [DCCP_CLOSING]    = DCCP_CLOSED,
814         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
815         [DCCP_CLOSED]     = DCCP_CLOSED,
816 };
817
818 static int dccp_close_state(struct sock *sk)
819 {
820         const int next = dccp_new_state[sk->sk_state];
821         const int ns = next & DCCP_STATE_MASK;
822
823         if (ns != sk->sk_state)
824                 dccp_set_state(sk, ns);
825
826         return next & DCCP_ACTION_FIN;
827 }
828
829 void dccp_close(struct sock *sk, long timeout)
830 {
831         struct dccp_sock *dp = dccp_sk(sk);
832         struct sk_buff *skb;
833         int state;
834
835         lock_sock(sk);
836
837         sk->sk_shutdown = SHUTDOWN_MASK;
838
839         if (sk->sk_state == DCCP_LISTEN) {
840                 dccp_set_state(sk, DCCP_CLOSED);
841
842                 /* Special case. */
843                 inet_csk_listen_stop(sk);
844
845                 goto adjudge_to_death;
846         }
847
848         sk_stop_timer(sk, &dp->dccps_xmit_timer);
849
850         /*
851          * We need to flush the recv. buffs.  We do this only on the
852          * descriptor close, not protocol-sourced closes, because the
853           *reader process may not have drained the data yet!
854          */
855         /* FIXME: check for unread data */
856         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
857                 __kfree_skb(skb);
858         }
859
860         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
861                 /* Check zero linger _after_ checking for unread data. */
862                 sk->sk_prot->disconnect(sk, 0);
863         } else if (dccp_close_state(sk)) {
864                 dccp_send_close(sk, 1);
865         }
866
867         sk_stream_wait_close(sk, timeout);
868
869 adjudge_to_death:
870         state = sk->sk_state;
871         sock_hold(sk);
872         sock_orphan(sk);
873         atomic_inc(sk->sk_prot->orphan_count);
874
875         /*
876          * It is the last release_sock in its life. It will remove backlog.
877          */
878         release_sock(sk);
879         /*
880          * Now socket is owned by kernel and we acquire BH lock
881          * to finish close. No need to check for user refs.
882          */
883         local_bh_disable();
884         bh_lock_sock(sk);
885         BUG_TRAP(!sock_owned_by_user(sk));
886
887         /* Have we already been destroyed by a softirq or backlog? */
888         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
889                 goto out;
890
891         /*
892          * The last release_sock may have processed the CLOSE or RESET
893          * packet moving sock to CLOSED state, if not we have to fire
894          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
895          * in draft-ietf-dccp-spec-11. -acme
896          */
897         if (sk->sk_state == DCCP_CLOSING) {
898                 /* FIXME: should start at 2 * RTT */
899                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
900                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
901                                           inet_csk(sk)->icsk_rto,
902                                           DCCP_RTO_MAX);
903 #if 0
904                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
905                 dccp_set_state(sk, DCCP_CLOSED);
906 #endif
907         }
908
909         if (sk->sk_state == DCCP_CLOSED)
910                 inet_csk_destroy_sock(sk);
911
912         /* Otherwise, socket is reprieved until protocol close. */
913
914 out:
915         bh_unlock_sock(sk);
916         local_bh_enable();
917         sock_put(sk);
918 }
919
920 EXPORT_SYMBOL_GPL(dccp_close);
921
922 void dccp_shutdown(struct sock *sk, int how)
923 {
924         dccp_pr_debug("entry\n");
925 }
926
927 EXPORT_SYMBOL_GPL(dccp_shutdown);
928
929 static int __init dccp_mib_init(void)
930 {
931         int rc = -ENOMEM;
932
933         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
934         if (dccp_statistics[0] == NULL)
935                 goto out;
936
937         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
938         if (dccp_statistics[1] == NULL)
939                 goto out_free_one;
940
941         rc = 0;
942 out:
943         return rc;
944 out_free_one:
945         free_percpu(dccp_statistics[0]);
946         dccp_statistics[0] = NULL;
947         goto out;
948
949 }
950
951 static void dccp_mib_exit(void)
952 {
953         free_percpu(dccp_statistics[0]);
954         free_percpu(dccp_statistics[1]);
955         dccp_statistics[0] = dccp_statistics[1] = NULL;
956 }
957
958 static int thash_entries;
959 module_param(thash_entries, int, 0444);
960 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
961
962 #ifdef CONFIG_IP_DCCP_DEBUG
963 int dccp_debug;
964 module_param(dccp_debug, int, 0444);
965 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
966
967 EXPORT_SYMBOL_GPL(dccp_debug);
968 #endif
969
970 static int __init dccp_init(void)
971 {
972         unsigned long goal;
973         int ehash_order, bhash_order, i;
974         int rc = -ENOBUFS;
975
976         dccp_hashinfo.bind_bucket_cachep =
977                 kmem_cache_create("dccp_bind_bucket",
978                                   sizeof(struct inet_bind_bucket), 0,
979                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
980         if (!dccp_hashinfo.bind_bucket_cachep)
981                 goto out;
982
983         /*
984          * Size and allocate the main established and bind bucket
985          * hash tables.
986          *
987          * The methodology is similar to that of the buffer cache.
988          */
989         if (num_physpages >= (128 * 1024))
990                 goal = num_physpages >> (21 - PAGE_SHIFT);
991         else
992                 goal = num_physpages >> (23 - PAGE_SHIFT);
993
994         if (thash_entries)
995                 goal = (thash_entries *
996                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
997         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
998                 ;
999         do {
1000                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1001                                         sizeof(struct inet_ehash_bucket);
1002                 dccp_hashinfo.ehash_size >>= 1;
1003                 while (dccp_hashinfo.ehash_size &
1004                        (dccp_hashinfo.ehash_size - 1))
1005                         dccp_hashinfo.ehash_size--;
1006                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1007                         __get_free_pages(GFP_ATOMIC, ehash_order);
1008         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1009
1010         if (!dccp_hashinfo.ehash) {
1011                 printk(KERN_CRIT "Failed to allocate DCCP "
1012                                  "established hash table\n");
1013                 goto out_free_bind_bucket_cachep;
1014         }
1015
1016         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1017                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1018                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1019         }
1020
1021         bhash_order = ehash_order;
1022
1023         do {
1024                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1025                                         sizeof(struct inet_bind_hashbucket);
1026                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1027                     bhash_order > 0)
1028                         continue;
1029                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1030                         __get_free_pages(GFP_ATOMIC, bhash_order);
1031         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1032
1033         if (!dccp_hashinfo.bhash) {
1034                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1035                 goto out_free_dccp_ehash;
1036         }
1037
1038         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1039                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1040                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1041         }
1042
1043         rc = dccp_mib_init();
1044         if (rc)
1045                 goto out_free_dccp_bhash;
1046
1047         rc = dccp_ackvec_init();
1048         if (rc)
1049                 goto out_free_dccp_mib;
1050
1051         rc = dccp_sysctl_init();
1052         if (rc)
1053                 goto out_ackvec_exit;
1054 out:
1055         return rc;
1056 out_ackvec_exit:
1057         dccp_ackvec_exit();
1058 out_free_dccp_mib:
1059         dccp_mib_exit();
1060 out_free_dccp_bhash:
1061         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1062         dccp_hashinfo.bhash = NULL;
1063 out_free_dccp_ehash:
1064         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1065         dccp_hashinfo.ehash = NULL;
1066 out_free_bind_bucket_cachep:
1067         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1068         dccp_hashinfo.bind_bucket_cachep = NULL;
1069         goto out;
1070 }
1071
1072 static void __exit dccp_fini(void)
1073 {
1074         dccp_mib_exit();
1075         free_pages((unsigned long)dccp_hashinfo.bhash,
1076                    get_order(dccp_hashinfo.bhash_size *
1077                              sizeof(struct inet_bind_hashbucket)));
1078         free_pages((unsigned long)dccp_hashinfo.ehash,
1079                    get_order(dccp_hashinfo.ehash_size *
1080                              sizeof(struct inet_ehash_bucket)));
1081         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1082         dccp_ackvec_exit();
1083         dccp_sysctl_exit();
1084 }
1085
1086 module_init(dccp_init);
1087 module_exit(dccp_fini);
1088
1089 MODULE_LICENSE("GPL");
1090 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1091 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");