[DCCP] feat: Introduce sysctls for the default features
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/inet_sock.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37
38 #include "ccid.h"
39 #include "dccp.h"
40 #include "feat.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 EXPORT_SYMBOL_GPL(dccp_statistics);
45
46 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
47
48 EXPORT_SYMBOL_GPL(dccp_orphan_count);
49
50 static struct net_protocol dccp_protocol = {
51         .handler        = dccp_v4_rcv,
52         .err_handler    = dccp_v4_err,
53         .no_policy      = 1,
54 };
55
56 const char *dccp_packet_name(const int type)
57 {
58         static const char *dccp_packet_names[] = {
59                 [DCCP_PKT_REQUEST]  = "REQUEST",
60                 [DCCP_PKT_RESPONSE] = "RESPONSE",
61                 [DCCP_PKT_DATA]     = "DATA",
62                 [DCCP_PKT_ACK]      = "ACK",
63                 [DCCP_PKT_DATAACK]  = "DATAACK",
64                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65                 [DCCP_PKT_CLOSE]    = "CLOSE",
66                 [DCCP_PKT_RESET]    = "RESET",
67                 [DCCP_PKT_SYNC]     = "SYNC",
68                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
69         };
70
71         if (type >= DCCP_NR_PKT_TYPES)
72                 return "INVALID";
73         else
74                 return dccp_packet_names[type];
75 }
76
77 EXPORT_SYMBOL_GPL(dccp_packet_name);
78
79 const char *dccp_state_name(const int state)
80 {
81         static char *dccp_state_names[] = {
82         [DCCP_OPEN]       = "OPEN",
83         [DCCP_REQUESTING] = "REQUESTING",
84         [DCCP_PARTOPEN]   = "PARTOPEN",
85         [DCCP_LISTEN]     = "LISTEN",
86         [DCCP_RESPOND]    = "RESPOND",
87         [DCCP_CLOSING]    = "CLOSING",
88         [DCCP_TIME_WAIT]  = "TIME_WAIT",
89         [DCCP_CLOSED]     = "CLOSED",
90         };
91
92         if (state >= DCCP_MAX_STATES)
93                 return "INVALID STATE!";
94         else
95                 return dccp_state_names[state];
96 }
97
98 EXPORT_SYMBOL_GPL(dccp_state_name);
99
100 static inline int dccp_listen_start(struct sock *sk)
101 {
102         struct dccp_sock *dp = dccp_sk(sk);
103
104         dp->dccps_role = DCCP_ROLE_LISTEN;
105         /*
106          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
107          * before calling listen()
108          */
109         if (dccp_service_not_initialized(sk))
110                 return -EPROTO;
111         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
112 }
113
114 int dccp_disconnect(struct sock *sk, int flags)
115 {
116         struct inet_connection_sock *icsk = inet_csk(sk);
117         struct inet_sock *inet = inet_sk(sk);
118         int err = 0;
119         const int old_state = sk->sk_state;
120
121         if (old_state != DCCP_CLOSED)
122                 dccp_set_state(sk, DCCP_CLOSED);
123
124         /* ABORT function of RFC793 */
125         if (old_state == DCCP_LISTEN) {
126                 inet_csk_listen_stop(sk);
127         /* FIXME: do the active reset thing */
128         } else if (old_state == DCCP_REQUESTING)
129                 sk->sk_err = ECONNRESET;
130
131         dccp_clear_xmit_timers(sk);
132         __skb_queue_purge(&sk->sk_receive_queue);
133         if (sk->sk_send_head != NULL) {
134                 __kfree_skb(sk->sk_send_head);
135                 sk->sk_send_head = NULL;
136         }
137
138         inet->dport = 0;
139
140         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
141                 inet_reset_saddr(sk);
142
143         sk->sk_shutdown = 0;
144         sock_reset_flag(sk, SOCK_DONE);
145
146         icsk->icsk_backoff = 0;
147         inet_csk_delack_init(sk);
148         __sk_dst_reset(sk);
149
150         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
151
152         sk->sk_error_report(sk);
153         return err;
154 }
155
156 EXPORT_SYMBOL_GPL(dccp_disconnect);
157
158 /*
159  *      Wait for a DCCP event.
160  *
161  *      Note that we don't need to lock the socket, as the upper poll layers
162  *      take care of normal races (between the test and the event) and we don't
163  *      go look at any of the socket buffers directly.
164  */
165 unsigned int dccp_poll(struct file *file, struct socket *sock,
166                        poll_table *wait)
167 {
168         unsigned int mask;
169         struct sock *sk = sock->sk;
170
171         poll_wait(file, sk->sk_sleep, wait);
172         if (sk->sk_state == DCCP_LISTEN)
173                 return inet_csk_listen_poll(sk);
174
175         /* Socket is not locked. We are protected from async events
176            by poll logic and correct handling of state changes
177            made by another threads is impossible in any case.
178          */
179
180         mask = 0;
181         if (sk->sk_err)
182                 mask = POLLERR;
183
184         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
185                 mask |= POLLHUP;
186         if (sk->sk_shutdown & RCV_SHUTDOWN)
187                 mask |= POLLIN | POLLRDNORM;
188
189         /* Connected? */
190         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
191                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
192                         mask |= POLLIN | POLLRDNORM;
193
194                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
195                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
196                                 mask |= POLLOUT | POLLWRNORM;
197                         } else {  /* send SIGIO later */
198                                 set_bit(SOCK_ASYNC_NOSPACE,
199                                         &sk->sk_socket->flags);
200                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
201
202                                 /* Race breaker. If space is freed after
203                                  * wspace test but before the flags are set,
204                                  * IO signal will be lost.
205                                  */
206                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
207                                         mask |= POLLOUT | POLLWRNORM;
208                         }
209                 }
210         }
211         return mask;
212 }
213
214 EXPORT_SYMBOL_GPL(dccp_poll);
215
216 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
217 {
218         dccp_pr_debug("entry\n");
219         return -ENOIOCTLCMD;
220 }
221
222 EXPORT_SYMBOL_GPL(dccp_ioctl);
223
224 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
225                                    char __user *optval, int optlen)
226 {
227         struct dccp_sock *dp = dccp_sk(sk);
228         struct dccp_service_list *sl = NULL;
229
230         if (service == DCCP_SERVICE_INVALID_VALUE || 
231             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
232                 return -EINVAL;
233
234         if (optlen > sizeof(service)) {
235                 sl = kmalloc(optlen, GFP_KERNEL);
236                 if (sl == NULL)
237                         return -ENOMEM;
238
239                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
240                 if (copy_from_user(sl->dccpsl_list,
241                                    optval + sizeof(service),
242                                    optlen - sizeof(service)) ||
243                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
244                         kfree(sl);
245                         return -EFAULT;
246                 }
247         }
248
249         lock_sock(sk);
250         dp->dccps_service = service;
251
252         kfree(dp->dccps_service_list);
253
254         dp->dccps_service_list = sl;
255         release_sock(sk);
256         return 0;
257 }
258
259 /* byte 1 is feature.  the rest is the preference list */
260 static int dccp_setsockopt_change(struct sock *sk, int type,
261                                   struct dccp_so_feat __user *optval)
262 {
263         struct dccp_so_feat opt;
264         u8 *val;
265         int rc;
266
267         if (copy_from_user(&opt, optval, sizeof(opt)))
268                 return -EFAULT;
269
270         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
271         if (!val)
272                 return -ENOMEM;
273
274         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
275                 rc = -EFAULT;
276                 goto out_free_val;
277         }
278
279         rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
280                               GFP_KERNEL);
281         if (rc)
282                 goto out_free_val;
283
284 out:
285         return rc;
286
287 out_free_val:
288         kfree(val);
289         goto out;
290 }
291
292 int dccp_setsockopt(struct sock *sk, int level, int optname,
293                     char __user *optval, int optlen)
294 {
295         struct dccp_sock *dp;
296         int err;
297         int val;
298
299         if (level != SOL_DCCP)
300                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
301                                                              optname, optval,
302                                                              optlen);
303
304         if (optlen < sizeof(int))
305                 return -EINVAL;
306
307         if (get_user(val, (int __user *)optval))
308                 return -EFAULT;
309
310         if (optname == DCCP_SOCKOPT_SERVICE)
311                 return dccp_setsockopt_service(sk, val, optval, optlen);
312
313         lock_sock(sk);
314         dp = dccp_sk(sk);
315         err = 0;
316
317         switch (optname) {
318         case DCCP_SOCKOPT_PACKET_SIZE:
319                 dp->dccps_packet_size = val;
320                 break;
321
322         case DCCP_SOCKOPT_CHANGE_L:
323                 if (optlen != sizeof(struct dccp_so_feat))
324                         err = -EINVAL;
325                 else
326                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
327                                                      (struct dccp_so_feat *)
328                                                      optval);
329                 break;
330
331         case DCCP_SOCKOPT_CHANGE_R:
332                 if (optlen != sizeof(struct dccp_so_feat))
333                         err = -EINVAL;
334                 else
335                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
336                                                      (struct dccp_so_feat *)
337                                                      optval);
338                 break;
339
340         default:
341                 err = -ENOPROTOOPT;
342                 break;
343         }
344         
345         release_sock(sk);
346         return err;
347 }
348
349 EXPORT_SYMBOL_GPL(dccp_setsockopt);
350
351 static int dccp_getsockopt_service(struct sock *sk, int len,
352                                    __be32 __user *optval,
353                                    int __user *optlen)
354 {
355         const struct dccp_sock *dp = dccp_sk(sk);
356         const struct dccp_service_list *sl;
357         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
358
359         lock_sock(sk);
360         if (dccp_service_not_initialized(sk))
361                 goto out;
362
363         if ((sl = dp->dccps_service_list) != NULL) {
364                 slen = sl->dccpsl_nr * sizeof(u32);
365                 total_len += slen;
366         }
367
368         err = -EINVAL;
369         if (total_len > len)
370                 goto out;
371
372         err = 0;
373         if (put_user(total_len, optlen) ||
374             put_user(dp->dccps_service, optval) ||
375             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
376                 err = -EFAULT;
377 out:
378         release_sock(sk);
379         return err;
380 }
381
382 int dccp_getsockopt(struct sock *sk, int level, int optname,
383                     char __user *optval, int __user *optlen)
384 {
385         struct dccp_sock *dp;
386         int val, len;
387
388         if (level != SOL_DCCP)
389                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
390                                                              optname, optval,
391                                                              optlen);
392         if (get_user(len, optlen))
393                 return -EFAULT;
394
395         if (len < sizeof(int))
396                 return -EINVAL;
397
398         dp = dccp_sk(sk);
399
400         switch (optname) {
401         case DCCP_SOCKOPT_PACKET_SIZE:
402                 val = dp->dccps_packet_size;
403                 len = sizeof(dp->dccps_packet_size);
404                 break;
405         case DCCP_SOCKOPT_SERVICE:
406                 return dccp_getsockopt_service(sk, len,
407                                                (__be32 __user *)optval, optlen);
408         case 128 ... 191:
409                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
410                                              len, (u32 __user *)optval, optlen);
411         case 192 ... 255:
412                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
413                                              len, (u32 __user *)optval, optlen);
414         default:
415                 return -ENOPROTOOPT;
416         }
417
418         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
419                 return -EFAULT;
420
421         return 0;
422 }
423
424 EXPORT_SYMBOL_GPL(dccp_getsockopt);
425
426 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
427                  size_t len)
428 {
429         const struct dccp_sock *dp = dccp_sk(sk);
430         const int flags = msg->msg_flags;
431         const int noblock = flags & MSG_DONTWAIT;
432         struct sk_buff *skb;
433         int rc, size;
434         long timeo;
435
436         if (len > dp->dccps_mss_cache)
437                 return -EMSGSIZE;
438
439         lock_sock(sk);
440         timeo = sock_sndtimeo(sk, noblock);
441
442         /*
443          * We have to use sk_stream_wait_connect here to set sk_write_pending,
444          * so that the trick in dccp_rcv_request_sent_state_process.
445          */
446         /* Wait for a connection to finish. */
447         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
448                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
449                         goto out_release;
450
451         size = sk->sk_prot->max_header + len;
452         release_sock(sk);
453         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
454         lock_sock(sk);
455         if (skb == NULL)
456                 goto out_release;
457
458         skb_reserve(skb, sk->sk_prot->max_header);
459         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
460         if (rc != 0)
461                 goto out_discard;
462
463         rc = dccp_write_xmit(sk, skb, &timeo);
464         /*
465          * XXX we don't use sk_write_queue, so just discard the packet.
466          *     Current plan however is to _use_ sk_write_queue with
467          *     an algorith similar to tcp_sendmsg, where the main difference
468          *     is that in DCCP we have to respect packet boundaries, so
469          *     no coalescing of skbs.
470          *
471          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
472          *     generated callgraph 8) -acme
473          */
474 out_release:
475         release_sock(sk);
476         return rc ? : len;
477 out_discard:
478         kfree_skb(skb);
479         goto out_release;
480 }
481
482 EXPORT_SYMBOL_GPL(dccp_sendmsg);
483
484 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
485                  size_t len, int nonblock, int flags, int *addr_len)
486 {
487         const struct dccp_hdr *dh;
488         long timeo;
489
490         lock_sock(sk);
491
492         if (sk->sk_state == DCCP_LISTEN) {
493                 len = -ENOTCONN;
494                 goto out;
495         }
496
497         timeo = sock_rcvtimeo(sk, nonblock);
498
499         do {
500                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
501
502                 if (skb == NULL)
503                         goto verify_sock_status;
504
505                 dh = dccp_hdr(skb);
506
507                 if (dh->dccph_type == DCCP_PKT_DATA ||
508                     dh->dccph_type == DCCP_PKT_DATAACK)
509                         goto found_ok_skb;
510
511                 if (dh->dccph_type == DCCP_PKT_RESET ||
512                     dh->dccph_type == DCCP_PKT_CLOSE) {
513                         dccp_pr_debug("found fin ok!\n");
514                         len = 0;
515                         goto found_fin_ok;
516                 }
517                 dccp_pr_debug("packet_type=%s\n",
518                               dccp_packet_name(dh->dccph_type));
519                 sk_eat_skb(sk, skb);
520 verify_sock_status:
521                 if (sock_flag(sk, SOCK_DONE)) {
522                         len = 0;
523                         break;
524                 }
525
526                 if (sk->sk_err) {
527                         len = sock_error(sk);
528                         break;
529                 }
530
531                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
532                         len = 0;
533                         break;
534                 }
535
536                 if (sk->sk_state == DCCP_CLOSED) {
537                         if (!sock_flag(sk, SOCK_DONE)) {
538                                 /* This occurs when user tries to read
539                                  * from never connected socket.
540                                  */
541                                 len = -ENOTCONN;
542                                 break;
543                         }
544                         len = 0;
545                         break;
546                 }
547
548                 if (!timeo) {
549                         len = -EAGAIN;
550                         break;
551                 }
552
553                 if (signal_pending(current)) {
554                         len = sock_intr_errno(timeo);
555                         break;
556                 }
557
558                 sk_wait_data(sk, &timeo);
559                 continue;
560         found_ok_skb:
561                 if (len > skb->len)
562                         len = skb->len;
563                 else if (len < skb->len)
564                         msg->msg_flags |= MSG_TRUNC;
565
566                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
567                         /* Exception. Bailout! */
568                         len = -EFAULT;
569                         break;
570                 }
571         found_fin_ok:
572                 if (!(flags & MSG_PEEK))
573                         sk_eat_skb(sk, skb);
574                 break;
575         } while (1);
576 out:
577         release_sock(sk);
578         return len;
579 }
580
581 EXPORT_SYMBOL_GPL(dccp_recvmsg);
582
583 int inet_dccp_listen(struct socket *sock, int backlog)
584 {
585         struct sock *sk = sock->sk;
586         unsigned char old_state;
587         int err;
588
589         lock_sock(sk);
590
591         err = -EINVAL;
592         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
593                 goto out;
594
595         old_state = sk->sk_state;
596         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
597                 goto out;
598
599         /* Really, if the socket is already in listen state
600          * we can only allow the backlog to be adjusted.
601          */
602         if (old_state != DCCP_LISTEN) {
603                 /*
604                  * FIXME: here it probably should be sk->sk_prot->listen_start
605                  * see tcp_listen_start
606                  */
607                 err = dccp_listen_start(sk);
608                 if (err)
609                         goto out;
610         }
611         sk->sk_max_ack_backlog = backlog;
612         err = 0;
613
614 out:
615         release_sock(sk);
616         return err;
617 }
618
619 EXPORT_SYMBOL_GPL(inet_dccp_listen);
620
621 static const unsigned char dccp_new_state[] = {
622         /* current state:   new state:      action:     */
623         [0]               = DCCP_CLOSED,
624         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
625         [DCCP_REQUESTING] = DCCP_CLOSED,
626         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
627         [DCCP_LISTEN]     = DCCP_CLOSED,
628         [DCCP_RESPOND]    = DCCP_CLOSED,
629         [DCCP_CLOSING]    = DCCP_CLOSED,
630         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
631         [DCCP_CLOSED]     = DCCP_CLOSED,
632 };
633
634 static int dccp_close_state(struct sock *sk)
635 {
636         const int next = dccp_new_state[sk->sk_state];
637         const int ns = next & DCCP_STATE_MASK;
638
639         if (ns != sk->sk_state)
640                 dccp_set_state(sk, ns);
641
642         return next & DCCP_ACTION_FIN;
643 }
644
645 void dccp_close(struct sock *sk, long timeout)
646 {
647         struct sk_buff *skb;
648
649         lock_sock(sk);
650
651         sk->sk_shutdown = SHUTDOWN_MASK;
652
653         if (sk->sk_state == DCCP_LISTEN) {
654                 dccp_set_state(sk, DCCP_CLOSED);
655
656                 /* Special case. */
657                 inet_csk_listen_stop(sk);
658
659                 goto adjudge_to_death;
660         }
661
662         /*
663          * We need to flush the recv. buffs.  We do this only on the
664          * descriptor close, not protocol-sourced closes, because the
665           *reader process may not have drained the data yet!
666          */
667         /* FIXME: check for unread data */
668         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
669                 __kfree_skb(skb);
670         }
671
672         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
673                 /* Check zero linger _after_ checking for unread data. */
674                 sk->sk_prot->disconnect(sk, 0);
675         } else if (dccp_close_state(sk)) {
676                 dccp_send_close(sk, 1);
677         }
678
679         sk_stream_wait_close(sk, timeout);
680
681 adjudge_to_death:
682         /*
683          * It is the last release_sock in its life. It will remove backlog.
684          */
685         release_sock(sk);
686         /*
687          * Now socket is owned by kernel and we acquire BH lock
688          * to finish close. No need to check for user refs.
689          */
690         local_bh_disable();
691         bh_lock_sock(sk);
692         BUG_TRAP(!sock_owned_by_user(sk));
693
694         sock_hold(sk);
695         sock_orphan(sk);
696
697         /*
698          * The last release_sock may have processed the CLOSE or RESET
699          * packet moving sock to CLOSED state, if not we have to fire
700          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
701          * in draft-ietf-dccp-spec-11. -acme
702          */
703         if (sk->sk_state == DCCP_CLOSING) {
704                 /* FIXME: should start at 2 * RTT */
705                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
706                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
707                                           inet_csk(sk)->icsk_rto,
708                                           DCCP_RTO_MAX);
709 #if 0
710                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
711                 dccp_set_state(sk, DCCP_CLOSED);
712 #endif
713         }
714
715         atomic_inc(sk->sk_prot->orphan_count);
716         if (sk->sk_state == DCCP_CLOSED)
717                 inet_csk_destroy_sock(sk);
718
719         /* Otherwise, socket is reprieved until protocol close. */
720
721         bh_unlock_sock(sk);
722         local_bh_enable();
723         sock_put(sk);
724 }
725
726 EXPORT_SYMBOL_GPL(dccp_close);
727
728 void dccp_shutdown(struct sock *sk, int how)
729 {
730         dccp_pr_debug("entry\n");
731 }
732
733 EXPORT_SYMBOL_GPL(dccp_shutdown);
734
735 static const struct proto_ops inet_dccp_ops = {
736         .family         = PF_INET,
737         .owner          = THIS_MODULE,
738         .release        = inet_release,
739         .bind           = inet_bind,
740         .connect        = inet_stream_connect,
741         .socketpair     = sock_no_socketpair,
742         .accept         = inet_accept,
743         .getname        = inet_getname,
744         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
745         .poll           = dccp_poll,
746         .ioctl          = inet_ioctl,
747         /* FIXME: work on inet_listen to rename it to sock_common_listen */
748         .listen         = inet_dccp_listen,
749         .shutdown       = inet_shutdown,
750         .setsockopt     = sock_common_setsockopt,
751         .getsockopt     = sock_common_getsockopt,
752         .sendmsg        = inet_sendmsg,
753         .recvmsg        = sock_common_recvmsg,
754         .mmap           = sock_no_mmap,
755         .sendpage       = sock_no_sendpage,
756 };
757
758 extern struct net_proto_family inet_family_ops;
759
760 static struct inet_protosw dccp_v4_protosw = {
761         .type           = SOCK_DCCP,
762         .protocol       = IPPROTO_DCCP,
763         .prot           = &dccp_prot,
764         .ops            = &inet_dccp_ops,
765         .capability     = -1,
766         .no_check       = 0,
767         .flags          = INET_PROTOSW_ICSK,
768 };
769
770 /*
771  * This is the global socket data structure used for responding to
772  * the Out-of-the-blue (OOTB) packets. A control sock will be created
773  * for this socket at the initialization time.
774  */
775 struct socket *dccp_ctl_socket;
776
777 static char dccp_ctl_socket_err_msg[] __initdata =
778         KERN_ERR "DCCP: Failed to create the control socket.\n";
779
780 static int __init dccp_ctl_sock_init(void)
781 {
782         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
783                                   &dccp_ctl_socket);
784         if (rc < 0)
785                 printk(dccp_ctl_socket_err_msg);
786         else {
787                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
788                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
789
790                 /* Unhash it so that IP input processing does not even
791                  * see it, we do not wish this socket to see incoming
792                  * packets.
793                  */
794                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
795         }
796
797         return rc;
798 }
799
800 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
801 void dccp_ctl_sock_exit(void)
802 {
803         if (dccp_ctl_socket != NULL) {
804                 sock_release(dccp_ctl_socket);
805                 dccp_ctl_socket = NULL;
806         }
807 }
808
809 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
810 #endif
811
812 static int __init init_dccp_v4_mibs(void)
813 {
814         int rc = -ENOMEM;
815
816         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
817         if (dccp_statistics[0] == NULL)
818                 goto out;
819
820         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
821         if (dccp_statistics[1] == NULL)
822                 goto out_free_one;
823
824         rc = 0;
825 out:
826         return rc;
827 out_free_one:
828         free_percpu(dccp_statistics[0]);
829         dccp_statistics[0] = NULL;
830         goto out;
831
832 }
833
834 static int thash_entries;
835 module_param(thash_entries, int, 0444);
836 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
837
838 #ifdef CONFIG_IP_DCCP_DEBUG
839 int dccp_debug;
840 module_param(dccp_debug, int, 0444);
841 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
842
843 EXPORT_SYMBOL_GPL(dccp_debug);
844 #endif
845
846 static int __init dccp_init(void)
847 {
848         unsigned long goal;
849         int ehash_order, bhash_order, i;
850         int rc = proto_register(&dccp_prot, 1);
851
852         if (rc)
853                 goto out;
854
855         rc = -ENOBUFS;
856         dccp_hashinfo.bind_bucket_cachep =
857                 kmem_cache_create("dccp_bind_bucket",
858                                   sizeof(struct inet_bind_bucket), 0,
859                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
860         if (!dccp_hashinfo.bind_bucket_cachep)
861                 goto out_proto_unregister;
862
863         /*
864          * Size and allocate the main established and bind bucket
865          * hash tables.
866          *
867          * The methodology is similar to that of the buffer cache.
868          */
869         if (num_physpages >= (128 * 1024))
870                 goal = num_physpages >> (21 - PAGE_SHIFT);
871         else
872                 goal = num_physpages >> (23 - PAGE_SHIFT);
873
874         if (thash_entries)
875                 goal = (thash_entries *
876                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
877         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
878                 ;
879         do {
880                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
881                                         sizeof(struct inet_ehash_bucket);
882                 dccp_hashinfo.ehash_size >>= 1;
883                 while (dccp_hashinfo.ehash_size &
884                        (dccp_hashinfo.ehash_size - 1))
885                         dccp_hashinfo.ehash_size--;
886                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
887                         __get_free_pages(GFP_ATOMIC, ehash_order);
888         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
889
890         if (!dccp_hashinfo.ehash) {
891                 printk(KERN_CRIT "Failed to allocate DCCP "
892                                  "established hash table\n");
893                 goto out_free_bind_bucket_cachep;
894         }
895
896         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
897                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
898                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
899         }
900
901         bhash_order = ehash_order;
902
903         do {
904                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
905                                         sizeof(struct inet_bind_hashbucket);
906                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
907                     bhash_order > 0)
908                         continue;
909                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
910                         __get_free_pages(GFP_ATOMIC, bhash_order);
911         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
912
913         if (!dccp_hashinfo.bhash) {
914                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
915                 goto out_free_dccp_ehash;
916         }
917
918         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
919                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
920                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
921         }
922
923         rc = init_dccp_v4_mibs();
924         if (rc)
925                 goto out_free_dccp_bhash;
926
927         rc = -EAGAIN;
928         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
929                 goto out_free_dccp_v4_mibs;
930
931         inet_register_protosw(&dccp_v4_protosw);
932
933         rc = dccp_ackvec_init();
934         if (rc)
935                 goto out_unregister_protosw;
936
937         rc = dccp_sysctl_init();
938         if (rc)
939                 goto out_ackvec_exit;
940
941         rc = dccp_ctl_sock_init();
942         if (rc)
943                 goto out_sysctl_exit;
944 out:
945         return rc;
946 out_sysctl_exit:
947         dccp_sysctl_exit();
948 out_ackvec_exit:
949         dccp_ackvec_exit();
950 out_unregister_protosw:
951         inet_unregister_protosw(&dccp_v4_protosw);
952         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
953 out_free_dccp_v4_mibs:
954         free_percpu(dccp_statistics[0]);
955         free_percpu(dccp_statistics[1]);
956         dccp_statistics[0] = dccp_statistics[1] = NULL;
957 out_free_dccp_bhash:
958         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
959         dccp_hashinfo.bhash = NULL;
960 out_free_dccp_ehash:
961         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
962         dccp_hashinfo.ehash = NULL;
963 out_free_bind_bucket_cachep:
964         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
965         dccp_hashinfo.bind_bucket_cachep = NULL;
966 out_proto_unregister:
967         proto_unregister(&dccp_prot);
968         goto out;
969 }
970
971 static const char dccp_del_proto_err_msg[] __exitdata =
972         KERN_ERR "can't remove dccp net_protocol\n";
973
974 static void __exit dccp_fini(void)
975 {
976         inet_unregister_protosw(&dccp_v4_protosw);
977
978         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
979                 printk(dccp_del_proto_err_msg);
980
981         free_percpu(dccp_statistics[0]);
982         free_percpu(dccp_statistics[1]);
983         free_pages((unsigned long)dccp_hashinfo.bhash,
984                    get_order(dccp_hashinfo.bhash_size *
985                              sizeof(struct inet_bind_hashbucket)));
986         free_pages((unsigned long)dccp_hashinfo.ehash,
987                    get_order(dccp_hashinfo.ehash_size *
988                              sizeof(struct inet_ehash_bucket)));
989         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
990         proto_unregister(&dccp_prot);
991         dccp_ackvec_exit();
992         dccp_sysctl_exit();
993 }
994
995 module_init(dccp_init);
996 module_exit(dccp_fini);
997
998 /*
999  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1000  * values directly, Also cover the case where the protocol is not specified,
1001  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
1002  */
1003 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
1004 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
1005 MODULE_LICENSE("GPL");
1006 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1007 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");