net: Use a percpu_counter for orphan_count
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 struct percpu_counter dccp_orphan_count;
44 EXPORT_SYMBOL_GPL(dccp_orphan_count);
45
46 struct inet_hashinfo dccp_hashinfo;
47 EXPORT_SYMBOL_GPL(dccp_hashinfo);
48
49 /* the maximum queue length for tx in packets. 0 is no limit */
50 int sysctl_dccp_tx_qlen __read_mostly = 5;
51
52 void dccp_set_state(struct sock *sk, const int state)
53 {
54         const int oldstate = sk->sk_state;
55
56         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
57                       dccp_state_name(oldstate), dccp_state_name(state));
58         WARN_ON(state == oldstate);
59
60         switch (state) {
61         case DCCP_OPEN:
62                 if (oldstate != DCCP_OPEN)
63                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
64                 break;
65
66         case DCCP_CLOSED:
67                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
68                     oldstate == DCCP_CLOSING)
69                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
70
71                 sk->sk_prot->unhash(sk);
72                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
73                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
74                         inet_put_port(sk);
75                 /* fall through */
76         default:
77                 if (oldstate == DCCP_OPEN)
78                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
79         }
80
81         /* Change state AFTER socket is unhashed to avoid closed
82          * socket sitting in hash tables.
83          */
84         sk->sk_state = state;
85 }
86
87 EXPORT_SYMBOL_GPL(dccp_set_state);
88
89 static void dccp_finish_passive_close(struct sock *sk)
90 {
91         switch (sk->sk_state) {
92         case DCCP_PASSIVE_CLOSE:
93                 /* Node (client or server) has received Close packet. */
94                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
95                 dccp_set_state(sk, DCCP_CLOSED);
96                 break;
97         case DCCP_PASSIVE_CLOSEREQ:
98                 /*
99                  * Client received CloseReq. We set the `active' flag so that
100                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
101                  */
102                 dccp_send_close(sk, 1);
103                 dccp_set_state(sk, DCCP_CLOSING);
104         }
105 }
106
107 void dccp_done(struct sock *sk)
108 {
109         dccp_set_state(sk, DCCP_CLOSED);
110         dccp_clear_xmit_timers(sk);
111
112         sk->sk_shutdown = SHUTDOWN_MASK;
113
114         if (!sock_flag(sk, SOCK_DEAD))
115                 sk->sk_state_change(sk);
116         else
117                 inet_csk_destroy_sock(sk);
118 }
119
120 EXPORT_SYMBOL_GPL(dccp_done);
121
122 const char *dccp_packet_name(const int type)
123 {
124         static const char *dccp_packet_names[] = {
125                 [DCCP_PKT_REQUEST]  = "REQUEST",
126                 [DCCP_PKT_RESPONSE] = "RESPONSE",
127                 [DCCP_PKT_DATA]     = "DATA",
128                 [DCCP_PKT_ACK]      = "ACK",
129                 [DCCP_PKT_DATAACK]  = "DATAACK",
130                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
131                 [DCCP_PKT_CLOSE]    = "CLOSE",
132                 [DCCP_PKT_RESET]    = "RESET",
133                 [DCCP_PKT_SYNC]     = "SYNC",
134                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
135         };
136
137         if (type >= DCCP_NR_PKT_TYPES)
138                 return "INVALID";
139         else
140                 return dccp_packet_names[type];
141 }
142
143 EXPORT_SYMBOL_GPL(dccp_packet_name);
144
145 const char *dccp_state_name(const int state)
146 {
147         static char *dccp_state_names[] = {
148         [DCCP_OPEN]             = "OPEN",
149         [DCCP_REQUESTING]       = "REQUESTING",
150         [DCCP_PARTOPEN]         = "PARTOPEN",
151         [DCCP_LISTEN]           = "LISTEN",
152         [DCCP_RESPOND]          = "RESPOND",
153         [DCCP_CLOSING]          = "CLOSING",
154         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
155         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
156         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
157         [DCCP_TIME_WAIT]        = "TIME_WAIT",
158         [DCCP_CLOSED]           = "CLOSED",
159         };
160
161         if (state >= DCCP_MAX_STATES)
162                 return "INVALID STATE!";
163         else
164                 return dccp_state_names[state];
165 }
166
167 EXPORT_SYMBOL_GPL(dccp_state_name);
168
169 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
170 {
171         struct dccp_sock *dp = dccp_sk(sk);
172         struct dccp_minisock *dmsk = dccp_msk(sk);
173         struct inet_connection_sock *icsk = inet_csk(sk);
174
175         dccp_minisock_init(&dp->dccps_minisock);
176
177         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
178         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
179         sk->sk_state            = DCCP_CLOSED;
180         sk->sk_write_space      = dccp_write_space;
181         icsk->icsk_sync_mss     = dccp_sync_mss;
182         dp->dccps_mss_cache     = 536;
183         dp->dccps_rate_last     = jiffies;
184         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
185         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
186         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
187
188         dccp_init_xmit_timers(sk);
189
190         INIT_LIST_HEAD(&dp->dccps_featneg);
191         /*
192          * FIXME: We're hardcoding the CCID, and doing this at this point makes
193          * the listening (master) sock get CCID control blocks, which is not
194          * necessary, but for now, to not mess with the test userspace apps,
195          * lets leave it here, later the real solution is to do this in a
196          * setsockopt(CCIDs-I-want/accept). -acme
197          */
198         if (likely(ctl_sock_initialized)) {
199                 int rc = dccp_feat_init(sk);
200
201                 if (rc)
202                         return rc;
203
204                 if (dmsk->dccpms_send_ack_vector) {
205                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
206                         if (dp->dccps_hc_rx_ackvec == NULL)
207                                 return -ENOMEM;
208                 }
209                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
210                                                       sk, GFP_KERNEL);
211                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
212                                                       sk, GFP_KERNEL);
213                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
214                              dp->dccps_hc_tx_ccid == NULL)) {
215                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
216                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
217                         if (dmsk->dccpms_send_ack_vector) {
218                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
219                                 dp->dccps_hc_rx_ackvec = NULL;
220                         }
221                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
222                         return -ENOMEM;
223                 }
224         } else {
225                 /* control socket doesn't need feat nego */
226                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
227                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
228         }
229
230         return 0;
231 }
232
233 EXPORT_SYMBOL_GPL(dccp_init_sock);
234
235 void dccp_destroy_sock(struct sock *sk)
236 {
237         struct dccp_sock *dp = dccp_sk(sk);
238         struct dccp_minisock *dmsk = dccp_msk(sk);
239
240         /*
241          * DCCP doesn't use sk_write_queue, just sk_send_head
242          * for retransmissions
243          */
244         if (sk->sk_send_head != NULL) {
245                 kfree_skb(sk->sk_send_head);
246                 sk->sk_send_head = NULL;
247         }
248
249         /* Clean up a referenced DCCP bind bucket. */
250         if (inet_csk(sk)->icsk_bind_hash != NULL)
251                 inet_put_port(sk);
252
253         kfree(dp->dccps_service_list);
254         dp->dccps_service_list = NULL;
255
256         if (dmsk->dccpms_send_ack_vector) {
257                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
258                 dp->dccps_hc_rx_ackvec = NULL;
259         }
260         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
261         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
262         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
263
264         /* clean up feature negotiation state */
265         dccp_feat_list_purge(&dp->dccps_featneg);
266 }
267
268 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
269
270 static inline int dccp_listen_start(struct sock *sk, int backlog)
271 {
272         struct dccp_sock *dp = dccp_sk(sk);
273
274         dp->dccps_role = DCCP_ROLE_LISTEN;
275         /* do not start to listen if feature negotiation setup fails */
276         if (dccp_feat_finalise_settings(dp))
277                 return -EPROTO;
278         return inet_csk_listen_start(sk, backlog);
279 }
280
281 static inline int dccp_need_reset(int state)
282 {
283         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
284                state != DCCP_REQUESTING;
285 }
286
287 int dccp_disconnect(struct sock *sk, int flags)
288 {
289         struct inet_connection_sock *icsk = inet_csk(sk);
290         struct inet_sock *inet = inet_sk(sk);
291         int err = 0;
292         const int old_state = sk->sk_state;
293
294         if (old_state != DCCP_CLOSED)
295                 dccp_set_state(sk, DCCP_CLOSED);
296
297         /*
298          * This corresponds to the ABORT function of RFC793, sec. 3.8
299          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
300          */
301         if (old_state == DCCP_LISTEN) {
302                 inet_csk_listen_stop(sk);
303         } else if (dccp_need_reset(old_state)) {
304                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
305                 sk->sk_err = ECONNRESET;
306         } else if (old_state == DCCP_REQUESTING)
307                 sk->sk_err = ECONNRESET;
308
309         dccp_clear_xmit_timers(sk);
310
311         __skb_queue_purge(&sk->sk_receive_queue);
312         __skb_queue_purge(&sk->sk_write_queue);
313         if (sk->sk_send_head != NULL) {
314                 __kfree_skb(sk->sk_send_head);
315                 sk->sk_send_head = NULL;
316         }
317
318         inet->dport = 0;
319
320         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
321                 inet_reset_saddr(sk);
322
323         sk->sk_shutdown = 0;
324         sock_reset_flag(sk, SOCK_DONE);
325
326         icsk->icsk_backoff = 0;
327         inet_csk_delack_init(sk);
328         __sk_dst_reset(sk);
329
330         WARN_ON(inet->num && !icsk->icsk_bind_hash);
331
332         sk->sk_error_report(sk);
333         return err;
334 }
335
336 EXPORT_SYMBOL_GPL(dccp_disconnect);
337
338 /*
339  *      Wait for a DCCP event.
340  *
341  *      Note that we don't need to lock the socket, as the upper poll layers
342  *      take care of normal races (between the test and the event) and we don't
343  *      go look at any of the socket buffers directly.
344  */
345 unsigned int dccp_poll(struct file *file, struct socket *sock,
346                        poll_table *wait)
347 {
348         unsigned int mask;
349         struct sock *sk = sock->sk;
350
351         poll_wait(file, sk->sk_sleep, wait);
352         if (sk->sk_state == DCCP_LISTEN)
353                 return inet_csk_listen_poll(sk);
354
355         /* Socket is not locked. We are protected from async events
356            by poll logic and correct handling of state changes
357            made by another threads is impossible in any case.
358          */
359
360         mask = 0;
361         if (sk->sk_err)
362                 mask = POLLERR;
363
364         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
365                 mask |= POLLHUP;
366         if (sk->sk_shutdown & RCV_SHUTDOWN)
367                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
368
369         /* Connected? */
370         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
371                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
372                         mask |= POLLIN | POLLRDNORM;
373
374                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
375                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
376                                 mask |= POLLOUT | POLLWRNORM;
377                         } else {  /* send SIGIO later */
378                                 set_bit(SOCK_ASYNC_NOSPACE,
379                                         &sk->sk_socket->flags);
380                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
381
382                                 /* Race breaker. If space is freed after
383                                  * wspace test but before the flags are set,
384                                  * IO signal will be lost.
385                                  */
386                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
387                                         mask |= POLLOUT | POLLWRNORM;
388                         }
389                 }
390         }
391         return mask;
392 }
393
394 EXPORT_SYMBOL_GPL(dccp_poll);
395
396 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
397 {
398         int rc = -ENOTCONN;
399
400         lock_sock(sk);
401
402         if (sk->sk_state == DCCP_LISTEN)
403                 goto out;
404
405         switch (cmd) {
406         case SIOCINQ: {
407                 struct sk_buff *skb;
408                 unsigned long amount = 0;
409
410                 skb = skb_peek(&sk->sk_receive_queue);
411                 if (skb != NULL) {
412                         /*
413                          * We will only return the amount of this packet since
414                          * that is all that will be read.
415                          */
416                         amount = skb->len;
417                 }
418                 rc = put_user(amount, (int __user *)arg);
419         }
420                 break;
421         default:
422                 rc = -ENOIOCTLCMD;
423                 break;
424         }
425 out:
426         release_sock(sk);
427         return rc;
428 }
429
430 EXPORT_SYMBOL_GPL(dccp_ioctl);
431
432 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
433                                    char __user *optval, int optlen)
434 {
435         struct dccp_sock *dp = dccp_sk(sk);
436         struct dccp_service_list *sl = NULL;
437
438         if (service == DCCP_SERVICE_INVALID_VALUE ||
439             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
440                 return -EINVAL;
441
442         if (optlen > sizeof(service)) {
443                 sl = kmalloc(optlen, GFP_KERNEL);
444                 if (sl == NULL)
445                         return -ENOMEM;
446
447                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
448                 if (copy_from_user(sl->dccpsl_list,
449                                    optval + sizeof(service),
450                                    optlen - sizeof(service)) ||
451                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
452                         kfree(sl);
453                         return -EFAULT;
454                 }
455         }
456
457         lock_sock(sk);
458         dp->dccps_service = service;
459
460         kfree(dp->dccps_service_list);
461
462         dp->dccps_service_list = sl;
463         release_sock(sk);
464         return 0;
465 }
466
467 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
468 {
469         u8 *list, len;
470         int i, rc;
471
472         if (cscov < 0 || cscov > 15)
473                 return -EINVAL;
474         /*
475          * Populate a list of permissible values, in the range cscov...15. This
476          * is necessary since feature negotiation of single values only works if
477          * both sides incidentally choose the same value. Since the list starts
478          * lowest-value first, negotiation will pick the smallest shared value.
479          */
480         if (cscov == 0)
481                 return 0;
482         len = 16 - cscov;
483
484         list = kmalloc(len, GFP_KERNEL);
485         if (list == NULL)
486                 return -ENOBUFS;
487
488         for (i = 0; i < len; i++)
489                 list[i] = cscov++;
490
491         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
492
493         if (rc == 0) {
494                 if (rx)
495                         dccp_sk(sk)->dccps_pcrlen = cscov;
496                 else
497                         dccp_sk(sk)->dccps_pcslen = cscov;
498         }
499         kfree(list);
500         return rc;
501 }
502
503 static int dccp_setsockopt_ccid(struct sock *sk, int type,
504                                 char __user *optval, int optlen)
505 {
506         u8 *val;
507         int rc = 0;
508
509         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
510                 return -EINVAL;
511
512         val = kmalloc(optlen, GFP_KERNEL);
513         if (val == NULL)
514                 return -ENOMEM;
515
516         if (copy_from_user(val, optval, optlen)) {
517                 kfree(val);
518                 return -EFAULT;
519         }
520
521         lock_sock(sk);
522         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
523                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
524
525         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
526                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
527         release_sock(sk);
528
529         kfree(val);
530         return rc;
531 }
532
533 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
534                 char __user *optval, int optlen)
535 {
536         struct dccp_sock *dp = dccp_sk(sk);
537         int val, err = 0;
538
539         switch (optname) {
540         case DCCP_SOCKOPT_PACKET_SIZE:
541                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
542                 return 0;
543         case DCCP_SOCKOPT_CHANGE_L:
544         case DCCP_SOCKOPT_CHANGE_R:
545                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
546                 return 0;
547         case DCCP_SOCKOPT_CCID:
548         case DCCP_SOCKOPT_RX_CCID:
549         case DCCP_SOCKOPT_TX_CCID:
550                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
551         }
552
553         if (optlen < (int)sizeof(int))
554                 return -EINVAL;
555
556         if (get_user(val, (int __user *)optval))
557                 return -EFAULT;
558
559         if (optname == DCCP_SOCKOPT_SERVICE)
560                 return dccp_setsockopt_service(sk, val, optval, optlen);
561
562         lock_sock(sk);
563         switch (optname) {
564         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
565                 if (dp->dccps_role != DCCP_ROLE_SERVER)
566                         err = -EOPNOTSUPP;
567                 else
568                         dp->dccps_server_timewait = (val != 0);
569                 break;
570         case DCCP_SOCKOPT_SEND_CSCOV:
571                 err = dccp_setsockopt_cscov(sk, val, false);
572                 break;
573         case DCCP_SOCKOPT_RECV_CSCOV:
574                 err = dccp_setsockopt_cscov(sk, val, true);
575                 break;
576         default:
577                 err = -ENOPROTOOPT;
578                 break;
579         }
580         release_sock(sk);
581
582         return err;
583 }
584
585 int dccp_setsockopt(struct sock *sk, int level, int optname,
586                     char __user *optval, int optlen)
587 {
588         if (level != SOL_DCCP)
589                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
590                                                              optname, optval,
591                                                              optlen);
592         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
593 }
594
595 EXPORT_SYMBOL_GPL(dccp_setsockopt);
596
597 #ifdef CONFIG_COMPAT
598 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
599                            char __user *optval, int optlen)
600 {
601         if (level != SOL_DCCP)
602                 return inet_csk_compat_setsockopt(sk, level, optname,
603                                                   optval, optlen);
604         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
605 }
606
607 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
608 #endif
609
610 static int dccp_getsockopt_service(struct sock *sk, int len,
611                                    __be32 __user *optval,
612                                    int __user *optlen)
613 {
614         const struct dccp_sock *dp = dccp_sk(sk);
615         const struct dccp_service_list *sl;
616         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
617
618         lock_sock(sk);
619         if ((sl = dp->dccps_service_list) != NULL) {
620                 slen = sl->dccpsl_nr * sizeof(u32);
621                 total_len += slen;
622         }
623
624         err = -EINVAL;
625         if (total_len > len)
626                 goto out;
627
628         err = 0;
629         if (put_user(total_len, optlen) ||
630             put_user(dp->dccps_service, optval) ||
631             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
632                 err = -EFAULT;
633 out:
634         release_sock(sk);
635         return err;
636 }
637
638 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
639                     char __user *optval, int __user *optlen)
640 {
641         struct dccp_sock *dp;
642         int val, len;
643
644         if (get_user(len, optlen))
645                 return -EFAULT;
646
647         if (len < (int)sizeof(int))
648                 return -EINVAL;
649
650         dp = dccp_sk(sk);
651
652         switch (optname) {
653         case DCCP_SOCKOPT_PACKET_SIZE:
654                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
655                 return 0;
656         case DCCP_SOCKOPT_SERVICE:
657                 return dccp_getsockopt_service(sk, len,
658                                                (__be32 __user *)optval, optlen);
659         case DCCP_SOCKOPT_GET_CUR_MPS:
660                 val = dp->dccps_mss_cache;
661                 break;
662         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
663                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
664         case DCCP_SOCKOPT_TX_CCID:
665                 val = ccid_get_current_tx_ccid(dp);
666                 if (val < 0)
667                         return -ENOPROTOOPT;
668                 break;
669         case DCCP_SOCKOPT_RX_CCID:
670                 val = ccid_get_current_rx_ccid(dp);
671                 if (val < 0)
672                         return -ENOPROTOOPT;
673                 break;
674         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
675                 val = dp->dccps_server_timewait;
676                 break;
677         case DCCP_SOCKOPT_SEND_CSCOV:
678                 val = dp->dccps_pcslen;
679                 break;
680         case DCCP_SOCKOPT_RECV_CSCOV:
681                 val = dp->dccps_pcrlen;
682                 break;
683         case 128 ... 191:
684                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
685                                              len, (u32 __user *)optval, optlen);
686         case 192 ... 255:
687                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
688                                              len, (u32 __user *)optval, optlen);
689         default:
690                 return -ENOPROTOOPT;
691         }
692
693         len = sizeof(val);
694         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
695                 return -EFAULT;
696
697         return 0;
698 }
699
700 int dccp_getsockopt(struct sock *sk, int level, int optname,
701                     char __user *optval, int __user *optlen)
702 {
703         if (level != SOL_DCCP)
704                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
705                                                              optname, optval,
706                                                              optlen);
707         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
708 }
709
710 EXPORT_SYMBOL_GPL(dccp_getsockopt);
711
712 #ifdef CONFIG_COMPAT
713 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
714                            char __user *optval, int __user *optlen)
715 {
716         if (level != SOL_DCCP)
717                 return inet_csk_compat_getsockopt(sk, level, optname,
718                                                   optval, optlen);
719         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
720 }
721
722 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
723 #endif
724
725 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
726                  size_t len)
727 {
728         const struct dccp_sock *dp = dccp_sk(sk);
729         const int flags = msg->msg_flags;
730         const int noblock = flags & MSG_DONTWAIT;
731         struct sk_buff *skb;
732         int rc, size;
733         long timeo;
734
735         if (len > dp->dccps_mss_cache)
736                 return -EMSGSIZE;
737
738         lock_sock(sk);
739
740         if (sysctl_dccp_tx_qlen &&
741             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
742                 rc = -EAGAIN;
743                 goto out_release;
744         }
745
746         timeo = sock_sndtimeo(sk, noblock);
747
748         /*
749          * We have to use sk_stream_wait_connect here to set sk_write_pending,
750          * so that the trick in dccp_rcv_request_sent_state_process.
751          */
752         /* Wait for a connection to finish. */
753         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
754                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
755                         goto out_release;
756
757         size = sk->sk_prot->max_header + len;
758         release_sock(sk);
759         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
760         lock_sock(sk);
761         if (skb == NULL)
762                 goto out_release;
763
764         skb_reserve(skb, sk->sk_prot->max_header);
765         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
766         if (rc != 0)
767                 goto out_discard;
768
769         skb_queue_tail(&sk->sk_write_queue, skb);
770         dccp_write_xmit(sk,0);
771 out_release:
772         release_sock(sk);
773         return rc ? : len;
774 out_discard:
775         kfree_skb(skb);
776         goto out_release;
777 }
778
779 EXPORT_SYMBOL_GPL(dccp_sendmsg);
780
781 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
782                  size_t len, int nonblock, int flags, int *addr_len)
783 {
784         const struct dccp_hdr *dh;
785         long timeo;
786
787         lock_sock(sk);
788
789         if (sk->sk_state == DCCP_LISTEN) {
790                 len = -ENOTCONN;
791                 goto out;
792         }
793
794         timeo = sock_rcvtimeo(sk, nonblock);
795
796         do {
797                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
798
799                 if (skb == NULL)
800                         goto verify_sock_status;
801
802                 dh = dccp_hdr(skb);
803
804                 switch (dh->dccph_type) {
805                 case DCCP_PKT_DATA:
806                 case DCCP_PKT_DATAACK:
807                         goto found_ok_skb;
808
809                 case DCCP_PKT_CLOSE:
810                 case DCCP_PKT_CLOSEREQ:
811                         if (!(flags & MSG_PEEK))
812                                 dccp_finish_passive_close(sk);
813                         /* fall through */
814                 case DCCP_PKT_RESET:
815                         dccp_pr_debug("found fin (%s) ok!\n",
816                                       dccp_packet_name(dh->dccph_type));
817                         len = 0;
818                         goto found_fin_ok;
819                 default:
820                         dccp_pr_debug("packet_type=%s\n",
821                                       dccp_packet_name(dh->dccph_type));
822                         sk_eat_skb(sk, skb, 0);
823                 }
824 verify_sock_status:
825                 if (sock_flag(sk, SOCK_DONE)) {
826                         len = 0;
827                         break;
828                 }
829
830                 if (sk->sk_err) {
831                         len = sock_error(sk);
832                         break;
833                 }
834
835                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
836                         len = 0;
837                         break;
838                 }
839
840                 if (sk->sk_state == DCCP_CLOSED) {
841                         if (!sock_flag(sk, SOCK_DONE)) {
842                                 /* This occurs when user tries to read
843                                  * from never connected socket.
844                                  */
845                                 len = -ENOTCONN;
846                                 break;
847                         }
848                         len = 0;
849                         break;
850                 }
851
852                 if (!timeo) {
853                         len = -EAGAIN;
854                         break;
855                 }
856
857                 if (signal_pending(current)) {
858                         len = sock_intr_errno(timeo);
859                         break;
860                 }
861
862                 sk_wait_data(sk, &timeo);
863                 continue;
864         found_ok_skb:
865                 if (len > skb->len)
866                         len = skb->len;
867                 else if (len < skb->len)
868                         msg->msg_flags |= MSG_TRUNC;
869
870                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
871                         /* Exception. Bailout! */
872                         len = -EFAULT;
873                         break;
874                 }
875         found_fin_ok:
876                 if (!(flags & MSG_PEEK))
877                         sk_eat_skb(sk, skb, 0);
878                 break;
879         } while (1);
880 out:
881         release_sock(sk);
882         return len;
883 }
884
885 EXPORT_SYMBOL_GPL(dccp_recvmsg);
886
887 int inet_dccp_listen(struct socket *sock, int backlog)
888 {
889         struct sock *sk = sock->sk;
890         unsigned char old_state;
891         int err;
892
893         lock_sock(sk);
894
895         err = -EINVAL;
896         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
897                 goto out;
898
899         old_state = sk->sk_state;
900         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
901                 goto out;
902
903         /* Really, if the socket is already in listen state
904          * we can only allow the backlog to be adjusted.
905          */
906         if (old_state != DCCP_LISTEN) {
907                 /*
908                  * FIXME: here it probably should be sk->sk_prot->listen_start
909                  * see tcp_listen_start
910                  */
911                 err = dccp_listen_start(sk, backlog);
912                 if (err)
913                         goto out;
914         }
915         sk->sk_max_ack_backlog = backlog;
916         err = 0;
917
918 out:
919         release_sock(sk);
920         return err;
921 }
922
923 EXPORT_SYMBOL_GPL(inet_dccp_listen);
924
925 static void dccp_terminate_connection(struct sock *sk)
926 {
927         u8 next_state = DCCP_CLOSED;
928
929         switch (sk->sk_state) {
930         case DCCP_PASSIVE_CLOSE:
931         case DCCP_PASSIVE_CLOSEREQ:
932                 dccp_finish_passive_close(sk);
933                 break;
934         case DCCP_PARTOPEN:
935                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
936                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
937                 /* fall through */
938         case DCCP_OPEN:
939                 dccp_send_close(sk, 1);
940
941                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
942                     !dccp_sk(sk)->dccps_server_timewait)
943                         next_state = DCCP_ACTIVE_CLOSEREQ;
944                 else
945                         next_state = DCCP_CLOSING;
946                 /* fall through */
947         default:
948                 dccp_set_state(sk, next_state);
949         }
950 }
951
952 void dccp_close(struct sock *sk, long timeout)
953 {
954         struct dccp_sock *dp = dccp_sk(sk);
955         struct sk_buff *skb;
956         u32 data_was_unread = 0;
957         int state;
958
959         lock_sock(sk);
960
961         sk->sk_shutdown = SHUTDOWN_MASK;
962
963         if (sk->sk_state == DCCP_LISTEN) {
964                 dccp_set_state(sk, DCCP_CLOSED);
965
966                 /* Special case. */
967                 inet_csk_listen_stop(sk);
968
969                 goto adjudge_to_death;
970         }
971
972         sk_stop_timer(sk, &dp->dccps_xmit_timer);
973
974         /*
975          * We need to flush the recv. buffs.  We do this only on the
976          * descriptor close, not protocol-sourced closes, because the
977           *reader process may not have drained the data yet!
978          */
979         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
980                 data_was_unread += skb->len;
981                 __kfree_skb(skb);
982         }
983
984         if (data_was_unread) {
985                 /* Unread data was tossed, send an appropriate Reset Code */
986                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
987                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
988                 dccp_set_state(sk, DCCP_CLOSED);
989         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
990                 /* Check zero linger _after_ checking for unread data. */
991                 sk->sk_prot->disconnect(sk, 0);
992         } else if (sk->sk_state != DCCP_CLOSED) {
993                 dccp_terminate_connection(sk);
994         }
995
996         sk_stream_wait_close(sk, timeout);
997
998 adjudge_to_death:
999         state = sk->sk_state;
1000         sock_hold(sk);
1001         sock_orphan(sk);
1002         percpu_counter_inc(sk->sk_prot->orphan_count);
1003
1004         /*
1005          * It is the last release_sock in its life. It will remove backlog.
1006          */
1007         release_sock(sk);
1008         /*
1009          * Now socket is owned by kernel and we acquire BH lock
1010          * to finish close. No need to check for user refs.
1011          */
1012         local_bh_disable();
1013         bh_lock_sock(sk);
1014         WARN_ON(sock_owned_by_user(sk));
1015
1016         /* Have we already been destroyed by a softirq or backlog? */
1017         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1018                 goto out;
1019
1020         if (sk->sk_state == DCCP_CLOSED)
1021                 inet_csk_destroy_sock(sk);
1022
1023         /* Otherwise, socket is reprieved until protocol close. */
1024
1025 out:
1026         bh_unlock_sock(sk);
1027         local_bh_enable();
1028         sock_put(sk);
1029 }
1030
1031 EXPORT_SYMBOL_GPL(dccp_close);
1032
1033 void dccp_shutdown(struct sock *sk, int how)
1034 {
1035         dccp_pr_debug("called shutdown(%x)\n", how);
1036 }
1037
1038 EXPORT_SYMBOL_GPL(dccp_shutdown);
1039
1040 static inline int dccp_mib_init(void)
1041 {
1042         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1043 }
1044
1045 static inline void dccp_mib_exit(void)
1046 {
1047         snmp_mib_free((void**)dccp_statistics);
1048 }
1049
1050 static int thash_entries;
1051 module_param(thash_entries, int, 0444);
1052 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1053
1054 #ifdef CONFIG_IP_DCCP_DEBUG
1055 int dccp_debug;
1056 module_param(dccp_debug, bool, 0644);
1057 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1058
1059 EXPORT_SYMBOL_GPL(dccp_debug);
1060 #endif
1061
1062 static int __init dccp_init(void)
1063 {
1064         unsigned long goal;
1065         int ehash_order, bhash_order, i;
1066         int rc;
1067
1068         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1069                      FIELD_SIZEOF(struct sk_buff, cb));
1070         rc = percpu_counter_init(&dccp_orphan_count, 0);
1071         if (rc)
1072                 goto out;
1073         rc = -ENOBUFS;
1074         inet_hashinfo_init(&dccp_hashinfo);
1075         dccp_hashinfo.bind_bucket_cachep =
1076                 kmem_cache_create("dccp_bind_bucket",
1077                                   sizeof(struct inet_bind_bucket), 0,
1078                                   SLAB_HWCACHE_ALIGN, NULL);
1079         if (!dccp_hashinfo.bind_bucket_cachep)
1080                 goto out_free_percpu;
1081
1082         /*
1083          * Size and allocate the main established and bind bucket
1084          * hash tables.
1085          *
1086          * The methodology is similar to that of the buffer cache.
1087          */
1088         if (num_physpages >= (128 * 1024))
1089                 goal = num_physpages >> (21 - PAGE_SHIFT);
1090         else
1091                 goal = num_physpages >> (23 - PAGE_SHIFT);
1092
1093         if (thash_entries)
1094                 goal = (thash_entries *
1095                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1096         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1097                 ;
1098         do {
1099                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1100                                         sizeof(struct inet_ehash_bucket);
1101                 while (dccp_hashinfo.ehash_size &
1102                        (dccp_hashinfo.ehash_size - 1))
1103                         dccp_hashinfo.ehash_size--;
1104                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1105                         __get_free_pages(GFP_ATOMIC, ehash_order);
1106         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1107
1108         if (!dccp_hashinfo.ehash) {
1109                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1110                 goto out_free_bind_bucket_cachep;
1111         }
1112
1113         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1114                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1115                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1116         }
1117
1118         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1119                         goto out_free_dccp_ehash;
1120
1121         bhash_order = ehash_order;
1122
1123         do {
1124                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1125                                         sizeof(struct inet_bind_hashbucket);
1126                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1127                     bhash_order > 0)
1128                         continue;
1129                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1130                         __get_free_pages(GFP_ATOMIC, bhash_order);
1131         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1132
1133         if (!dccp_hashinfo.bhash) {
1134                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1135                 goto out_free_dccp_locks;
1136         }
1137
1138         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1139                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1140                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1141         }
1142
1143         rc = dccp_mib_init();
1144         if (rc)
1145                 goto out_free_dccp_bhash;
1146
1147         rc = dccp_ackvec_init();
1148         if (rc)
1149                 goto out_free_dccp_mib;
1150
1151         rc = dccp_sysctl_init();
1152         if (rc)
1153                 goto out_ackvec_exit;
1154
1155         dccp_timestamping_init();
1156 out:
1157         return rc;
1158 out_ackvec_exit:
1159         dccp_ackvec_exit();
1160 out_free_dccp_mib:
1161         dccp_mib_exit();
1162 out_free_dccp_bhash:
1163         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1164         dccp_hashinfo.bhash = NULL;
1165 out_free_dccp_locks:
1166         inet_ehash_locks_free(&dccp_hashinfo);
1167 out_free_dccp_ehash:
1168         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1169         dccp_hashinfo.ehash = NULL;
1170 out_free_bind_bucket_cachep:
1171         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1172         dccp_hashinfo.bind_bucket_cachep = NULL;
1173 out_free_percpu:
1174         percpu_counter_destroy(&dccp_orphan_count);
1175         goto out;
1176 }
1177
1178 static void __exit dccp_fini(void)
1179 {
1180         dccp_mib_exit();
1181         free_pages((unsigned long)dccp_hashinfo.bhash,
1182                    get_order(dccp_hashinfo.bhash_size *
1183                              sizeof(struct inet_bind_hashbucket)));
1184         free_pages((unsigned long)dccp_hashinfo.ehash,
1185                    get_order(dccp_hashinfo.ehash_size *
1186                              sizeof(struct inet_ehash_bucket)));
1187         inet_ehash_locks_free(&dccp_hashinfo);
1188         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1189         dccp_ackvec_exit();
1190         dccp_sysctl_exit();
1191 }
1192
1193 module_init(dccp_init);
1194 module_exit(dccp_fini);
1195
1196 MODULE_LICENSE("GPL");
1197 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1198 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");