nfsd: nfsd should drop CAP_MKNOD for non-root
[safe/jmp/linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 struct percpu_counter dccp_orphan_count;
44 EXPORT_SYMBOL_GPL(dccp_orphan_count);
45
46 struct inet_hashinfo dccp_hashinfo;
47 EXPORT_SYMBOL_GPL(dccp_hashinfo);
48
49 /* the maximum queue length for tx in packets. 0 is no limit */
50 int sysctl_dccp_tx_qlen __read_mostly = 5;
51
52 void dccp_set_state(struct sock *sk, const int state)
53 {
54         const int oldstate = sk->sk_state;
55
56         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
57                       dccp_state_name(oldstate), dccp_state_name(state));
58         WARN_ON(state == oldstate);
59
60         switch (state) {
61         case DCCP_OPEN:
62                 if (oldstate != DCCP_OPEN)
63                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
64                 /* Client retransmits all Confirm options until entering OPEN */
65                 if (oldstate == DCCP_PARTOPEN)
66                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
67                 break;
68
69         case DCCP_CLOSED:
70                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
71                     oldstate == DCCP_CLOSING)
72                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74                 sk->sk_prot->unhash(sk);
75                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77                         inet_put_port(sk);
78                 /* fall through */
79         default:
80                 if (oldstate == DCCP_OPEN)
81                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82         }
83
84         /* Change state AFTER socket is unhashed to avoid closed
85          * socket sitting in hash tables.
86          */
87         sk->sk_state = state;
88 }
89
90 EXPORT_SYMBOL_GPL(dccp_set_state);
91
92 static void dccp_finish_passive_close(struct sock *sk)
93 {
94         switch (sk->sk_state) {
95         case DCCP_PASSIVE_CLOSE:
96                 /* Node (client or server) has received Close packet. */
97                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
98                 dccp_set_state(sk, DCCP_CLOSED);
99                 break;
100         case DCCP_PASSIVE_CLOSEREQ:
101                 /*
102                  * Client received CloseReq. We set the `active' flag so that
103                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
104                  */
105                 dccp_send_close(sk, 1);
106                 dccp_set_state(sk, DCCP_CLOSING);
107         }
108 }
109
110 void dccp_done(struct sock *sk)
111 {
112         dccp_set_state(sk, DCCP_CLOSED);
113         dccp_clear_xmit_timers(sk);
114
115         sk->sk_shutdown = SHUTDOWN_MASK;
116
117         if (!sock_flag(sk, SOCK_DEAD))
118                 sk->sk_state_change(sk);
119         else
120                 inet_csk_destroy_sock(sk);
121 }
122
123 EXPORT_SYMBOL_GPL(dccp_done);
124
125 const char *dccp_packet_name(const int type)
126 {
127         static const char *dccp_packet_names[] = {
128                 [DCCP_PKT_REQUEST]  = "REQUEST",
129                 [DCCP_PKT_RESPONSE] = "RESPONSE",
130                 [DCCP_PKT_DATA]     = "DATA",
131                 [DCCP_PKT_ACK]      = "ACK",
132                 [DCCP_PKT_DATAACK]  = "DATAACK",
133                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
134                 [DCCP_PKT_CLOSE]    = "CLOSE",
135                 [DCCP_PKT_RESET]    = "RESET",
136                 [DCCP_PKT_SYNC]     = "SYNC",
137                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
138         };
139
140         if (type >= DCCP_NR_PKT_TYPES)
141                 return "INVALID";
142         else
143                 return dccp_packet_names[type];
144 }
145
146 EXPORT_SYMBOL_GPL(dccp_packet_name);
147
148 const char *dccp_state_name(const int state)
149 {
150         static char *dccp_state_names[] = {
151         [DCCP_OPEN]             = "OPEN",
152         [DCCP_REQUESTING]       = "REQUESTING",
153         [DCCP_PARTOPEN]         = "PARTOPEN",
154         [DCCP_LISTEN]           = "LISTEN",
155         [DCCP_RESPOND]          = "RESPOND",
156         [DCCP_CLOSING]          = "CLOSING",
157         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
158         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
159         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
160         [DCCP_TIME_WAIT]        = "TIME_WAIT",
161         [DCCP_CLOSED]           = "CLOSED",
162         };
163
164         if (state >= DCCP_MAX_STATES)
165                 return "INVALID STATE!";
166         else
167                 return dccp_state_names[state];
168 }
169
170 EXPORT_SYMBOL_GPL(dccp_state_name);
171
172 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
173 {
174         struct dccp_sock *dp = dccp_sk(sk);
175         struct inet_connection_sock *icsk = inet_csk(sk);
176
177         dccp_minisock_init(&dp->dccps_minisock);
178
179         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
180         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
181         sk->sk_state            = DCCP_CLOSED;
182         sk->sk_write_space      = dccp_write_space;
183         icsk->icsk_sync_mss     = dccp_sync_mss;
184         dp->dccps_mss_cache     = 536;
185         dp->dccps_rate_last     = jiffies;
186         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
187         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
188         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
189
190         dccp_init_xmit_timers(sk);
191
192         INIT_LIST_HEAD(&dp->dccps_featneg);
193         /* control socket doesn't need feat nego */
194         if (likely(ctl_sock_initialized))
195                 return dccp_feat_init(sk);
196         return 0;
197 }
198
199 EXPORT_SYMBOL_GPL(dccp_init_sock);
200
201 void dccp_destroy_sock(struct sock *sk)
202 {
203         struct dccp_sock *dp = dccp_sk(sk);
204
205         /*
206          * DCCP doesn't use sk_write_queue, just sk_send_head
207          * for retransmissions
208          */
209         if (sk->sk_send_head != NULL) {
210                 kfree_skb(sk->sk_send_head);
211                 sk->sk_send_head = NULL;
212         }
213
214         /* Clean up a referenced DCCP bind bucket. */
215         if (inet_csk(sk)->icsk_bind_hash != NULL)
216                 inet_put_port(sk);
217
218         kfree(dp->dccps_service_list);
219         dp->dccps_service_list = NULL;
220
221         if (dp->dccps_hc_rx_ackvec != NULL) {
222                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
223                 dp->dccps_hc_rx_ackvec = NULL;
224         }
225         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
226         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
227         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228
229         /* clean up feature negotiation state */
230         dccp_feat_list_purge(&dp->dccps_featneg);
231 }
232
233 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
234
235 static inline int dccp_listen_start(struct sock *sk, int backlog)
236 {
237         struct dccp_sock *dp = dccp_sk(sk);
238
239         dp->dccps_role = DCCP_ROLE_LISTEN;
240         /* do not start to listen if feature negotiation setup fails */
241         if (dccp_feat_finalise_settings(dp))
242                 return -EPROTO;
243         return inet_csk_listen_start(sk, backlog);
244 }
245
246 static inline int dccp_need_reset(int state)
247 {
248         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
249                state != DCCP_REQUESTING;
250 }
251
252 int dccp_disconnect(struct sock *sk, int flags)
253 {
254         struct inet_connection_sock *icsk = inet_csk(sk);
255         struct inet_sock *inet = inet_sk(sk);
256         int err = 0;
257         const int old_state = sk->sk_state;
258
259         if (old_state != DCCP_CLOSED)
260                 dccp_set_state(sk, DCCP_CLOSED);
261
262         /*
263          * This corresponds to the ABORT function of RFC793, sec. 3.8
264          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
265          */
266         if (old_state == DCCP_LISTEN) {
267                 inet_csk_listen_stop(sk);
268         } else if (dccp_need_reset(old_state)) {
269                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
270                 sk->sk_err = ECONNRESET;
271         } else if (old_state == DCCP_REQUESTING)
272                 sk->sk_err = ECONNRESET;
273
274         dccp_clear_xmit_timers(sk);
275
276         __skb_queue_purge(&sk->sk_receive_queue);
277         __skb_queue_purge(&sk->sk_write_queue);
278         if (sk->sk_send_head != NULL) {
279                 __kfree_skb(sk->sk_send_head);
280                 sk->sk_send_head = NULL;
281         }
282
283         inet->dport = 0;
284
285         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
286                 inet_reset_saddr(sk);
287
288         sk->sk_shutdown = 0;
289         sock_reset_flag(sk, SOCK_DONE);
290
291         icsk->icsk_backoff = 0;
292         inet_csk_delack_init(sk);
293         __sk_dst_reset(sk);
294
295         WARN_ON(inet->num && !icsk->icsk_bind_hash);
296
297         sk->sk_error_report(sk);
298         return err;
299 }
300
301 EXPORT_SYMBOL_GPL(dccp_disconnect);
302
303 /*
304  *      Wait for a DCCP event.
305  *
306  *      Note that we don't need to lock the socket, as the upper poll layers
307  *      take care of normal races (between the test and the event) and we don't
308  *      go look at any of the socket buffers directly.
309  */
310 unsigned int dccp_poll(struct file *file, struct socket *sock,
311                        poll_table *wait)
312 {
313         unsigned int mask;
314         struct sock *sk = sock->sk;
315
316         poll_wait(file, sk->sk_sleep, wait);
317         if (sk->sk_state == DCCP_LISTEN)
318                 return inet_csk_listen_poll(sk);
319
320         /* Socket is not locked. We are protected from async events
321            by poll logic and correct handling of state changes
322            made by another threads is impossible in any case.
323          */
324
325         mask = 0;
326         if (sk->sk_err)
327                 mask = POLLERR;
328
329         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
330                 mask |= POLLHUP;
331         if (sk->sk_shutdown & RCV_SHUTDOWN)
332                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
333
334         /* Connected? */
335         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
336                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
337                         mask |= POLLIN | POLLRDNORM;
338
339                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
340                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
341                                 mask |= POLLOUT | POLLWRNORM;
342                         } else {  /* send SIGIO later */
343                                 set_bit(SOCK_ASYNC_NOSPACE,
344                                         &sk->sk_socket->flags);
345                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
346
347                                 /* Race breaker. If space is freed after
348                                  * wspace test but before the flags are set,
349                                  * IO signal will be lost.
350                                  */
351                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
352                                         mask |= POLLOUT | POLLWRNORM;
353                         }
354                 }
355         }
356         return mask;
357 }
358
359 EXPORT_SYMBOL_GPL(dccp_poll);
360
361 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
362 {
363         int rc = -ENOTCONN;
364
365         lock_sock(sk);
366
367         if (sk->sk_state == DCCP_LISTEN)
368                 goto out;
369
370         switch (cmd) {
371         case SIOCINQ: {
372                 struct sk_buff *skb;
373                 unsigned long amount = 0;
374
375                 skb = skb_peek(&sk->sk_receive_queue);
376                 if (skb != NULL) {
377                         /*
378                          * We will only return the amount of this packet since
379                          * that is all that will be read.
380                          */
381                         amount = skb->len;
382                 }
383                 rc = put_user(amount, (int __user *)arg);
384         }
385                 break;
386         default:
387                 rc = -ENOIOCTLCMD;
388                 break;
389         }
390 out:
391         release_sock(sk);
392         return rc;
393 }
394
395 EXPORT_SYMBOL_GPL(dccp_ioctl);
396
397 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
398                                    char __user *optval, int optlen)
399 {
400         struct dccp_sock *dp = dccp_sk(sk);
401         struct dccp_service_list *sl = NULL;
402
403         if (service == DCCP_SERVICE_INVALID_VALUE ||
404             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
405                 return -EINVAL;
406
407         if (optlen > sizeof(service)) {
408                 sl = kmalloc(optlen, GFP_KERNEL);
409                 if (sl == NULL)
410                         return -ENOMEM;
411
412                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
413                 if (copy_from_user(sl->dccpsl_list,
414                                    optval + sizeof(service),
415                                    optlen - sizeof(service)) ||
416                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
417                         kfree(sl);
418                         return -EFAULT;
419                 }
420         }
421
422         lock_sock(sk);
423         dp->dccps_service = service;
424
425         kfree(dp->dccps_service_list);
426
427         dp->dccps_service_list = sl;
428         release_sock(sk);
429         return 0;
430 }
431
432 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
433 {
434         u8 *list, len;
435         int i, rc;
436
437         if (cscov < 0 || cscov > 15)
438                 return -EINVAL;
439         /*
440          * Populate a list of permissible values, in the range cscov...15. This
441          * is necessary since feature negotiation of single values only works if
442          * both sides incidentally choose the same value. Since the list starts
443          * lowest-value first, negotiation will pick the smallest shared value.
444          */
445         if (cscov == 0)
446                 return 0;
447         len = 16 - cscov;
448
449         list = kmalloc(len, GFP_KERNEL);
450         if (list == NULL)
451                 return -ENOBUFS;
452
453         for (i = 0; i < len; i++)
454                 list[i] = cscov++;
455
456         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
457
458         if (rc == 0) {
459                 if (rx)
460                         dccp_sk(sk)->dccps_pcrlen = cscov;
461                 else
462                         dccp_sk(sk)->dccps_pcslen = cscov;
463         }
464         kfree(list);
465         return rc;
466 }
467
468 static int dccp_setsockopt_ccid(struct sock *sk, int type,
469                                 char __user *optval, int optlen)
470 {
471         u8 *val;
472         int rc = 0;
473
474         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
475                 return -EINVAL;
476
477         val = kmalloc(optlen, GFP_KERNEL);
478         if (val == NULL)
479                 return -ENOMEM;
480
481         if (copy_from_user(val, optval, optlen)) {
482                 kfree(val);
483                 return -EFAULT;
484         }
485
486         lock_sock(sk);
487         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
488                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
489
490         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
491                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
492         release_sock(sk);
493
494         kfree(val);
495         return rc;
496 }
497
498 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
499                 char __user *optval, int optlen)
500 {
501         struct dccp_sock *dp = dccp_sk(sk);
502         int val, err = 0;
503
504         switch (optname) {
505         case DCCP_SOCKOPT_PACKET_SIZE:
506                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
507                 return 0;
508         case DCCP_SOCKOPT_CHANGE_L:
509         case DCCP_SOCKOPT_CHANGE_R:
510                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
511                 return 0;
512         case DCCP_SOCKOPT_CCID:
513         case DCCP_SOCKOPT_RX_CCID:
514         case DCCP_SOCKOPT_TX_CCID:
515                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
516         }
517
518         if (optlen < (int)sizeof(int))
519                 return -EINVAL;
520
521         if (get_user(val, (int __user *)optval))
522                 return -EFAULT;
523
524         if (optname == DCCP_SOCKOPT_SERVICE)
525                 return dccp_setsockopt_service(sk, val, optval, optlen);
526
527         lock_sock(sk);
528         switch (optname) {
529         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
530                 if (dp->dccps_role != DCCP_ROLE_SERVER)
531                         err = -EOPNOTSUPP;
532                 else
533                         dp->dccps_server_timewait = (val != 0);
534                 break;
535         case DCCP_SOCKOPT_SEND_CSCOV:
536                 err = dccp_setsockopt_cscov(sk, val, false);
537                 break;
538         case DCCP_SOCKOPT_RECV_CSCOV:
539                 err = dccp_setsockopt_cscov(sk, val, true);
540                 break;
541         default:
542                 err = -ENOPROTOOPT;
543                 break;
544         }
545         release_sock(sk);
546
547         return err;
548 }
549
550 int dccp_setsockopt(struct sock *sk, int level, int optname,
551                     char __user *optval, int optlen)
552 {
553         if (level != SOL_DCCP)
554                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
555                                                              optname, optval,
556                                                              optlen);
557         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
558 }
559
560 EXPORT_SYMBOL_GPL(dccp_setsockopt);
561
562 #ifdef CONFIG_COMPAT
563 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
564                            char __user *optval, int optlen)
565 {
566         if (level != SOL_DCCP)
567                 return inet_csk_compat_setsockopt(sk, level, optname,
568                                                   optval, optlen);
569         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
570 }
571
572 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
573 #endif
574
575 static int dccp_getsockopt_service(struct sock *sk, int len,
576                                    __be32 __user *optval,
577                                    int __user *optlen)
578 {
579         const struct dccp_sock *dp = dccp_sk(sk);
580         const struct dccp_service_list *sl;
581         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
582
583         lock_sock(sk);
584         if ((sl = dp->dccps_service_list) != NULL) {
585                 slen = sl->dccpsl_nr * sizeof(u32);
586                 total_len += slen;
587         }
588
589         err = -EINVAL;
590         if (total_len > len)
591                 goto out;
592
593         err = 0;
594         if (put_user(total_len, optlen) ||
595             put_user(dp->dccps_service, optval) ||
596             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
597                 err = -EFAULT;
598 out:
599         release_sock(sk);
600         return err;
601 }
602
603 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
604                     char __user *optval, int __user *optlen)
605 {
606         struct dccp_sock *dp;
607         int val, len;
608
609         if (get_user(len, optlen))
610                 return -EFAULT;
611
612         if (len < (int)sizeof(int))
613                 return -EINVAL;
614
615         dp = dccp_sk(sk);
616
617         switch (optname) {
618         case DCCP_SOCKOPT_PACKET_SIZE:
619                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
620                 return 0;
621         case DCCP_SOCKOPT_SERVICE:
622                 return dccp_getsockopt_service(sk, len,
623                                                (__be32 __user *)optval, optlen);
624         case DCCP_SOCKOPT_GET_CUR_MPS:
625                 val = dp->dccps_mss_cache;
626                 break;
627         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
628                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
629         case DCCP_SOCKOPT_TX_CCID:
630                 val = ccid_get_current_tx_ccid(dp);
631                 if (val < 0)
632                         return -ENOPROTOOPT;
633                 break;
634         case DCCP_SOCKOPT_RX_CCID:
635                 val = ccid_get_current_rx_ccid(dp);
636                 if (val < 0)
637                         return -ENOPROTOOPT;
638                 break;
639         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
640                 val = dp->dccps_server_timewait;
641                 break;
642         case DCCP_SOCKOPT_SEND_CSCOV:
643                 val = dp->dccps_pcslen;
644                 break;
645         case DCCP_SOCKOPT_RECV_CSCOV:
646                 val = dp->dccps_pcrlen;
647                 break;
648         case 128 ... 191:
649                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
650                                              len, (u32 __user *)optval, optlen);
651         case 192 ... 255:
652                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
653                                              len, (u32 __user *)optval, optlen);
654         default:
655                 return -ENOPROTOOPT;
656         }
657
658         len = sizeof(val);
659         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
660                 return -EFAULT;
661
662         return 0;
663 }
664
665 int dccp_getsockopt(struct sock *sk, int level, int optname,
666                     char __user *optval, int __user *optlen)
667 {
668         if (level != SOL_DCCP)
669                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
670                                                              optname, optval,
671                                                              optlen);
672         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
673 }
674
675 EXPORT_SYMBOL_GPL(dccp_getsockopt);
676
677 #ifdef CONFIG_COMPAT
678 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
679                            char __user *optval, int __user *optlen)
680 {
681         if (level != SOL_DCCP)
682                 return inet_csk_compat_getsockopt(sk, level, optname,
683                                                   optval, optlen);
684         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
685 }
686
687 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
688 #endif
689
690 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
691                  size_t len)
692 {
693         const struct dccp_sock *dp = dccp_sk(sk);
694         const int flags = msg->msg_flags;
695         const int noblock = flags & MSG_DONTWAIT;
696         struct sk_buff *skb;
697         int rc, size;
698         long timeo;
699
700         if (len > dp->dccps_mss_cache)
701                 return -EMSGSIZE;
702
703         lock_sock(sk);
704
705         if (sysctl_dccp_tx_qlen &&
706             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
707                 rc = -EAGAIN;
708                 goto out_release;
709         }
710
711         timeo = sock_sndtimeo(sk, noblock);
712
713         /*
714          * We have to use sk_stream_wait_connect here to set sk_write_pending,
715          * so that the trick in dccp_rcv_request_sent_state_process.
716          */
717         /* Wait for a connection to finish. */
718         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
719                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
720                         goto out_release;
721
722         size = sk->sk_prot->max_header + len;
723         release_sock(sk);
724         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
725         lock_sock(sk);
726         if (skb == NULL)
727                 goto out_release;
728
729         skb_reserve(skb, sk->sk_prot->max_header);
730         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
731         if (rc != 0)
732                 goto out_discard;
733
734         skb_queue_tail(&sk->sk_write_queue, skb);
735         dccp_write_xmit(sk,0);
736 out_release:
737         release_sock(sk);
738         return rc ? : len;
739 out_discard:
740         kfree_skb(skb);
741         goto out_release;
742 }
743
744 EXPORT_SYMBOL_GPL(dccp_sendmsg);
745
746 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
747                  size_t len, int nonblock, int flags, int *addr_len)
748 {
749         const struct dccp_hdr *dh;
750         long timeo;
751
752         lock_sock(sk);
753
754         if (sk->sk_state == DCCP_LISTEN) {
755                 len = -ENOTCONN;
756                 goto out;
757         }
758
759         timeo = sock_rcvtimeo(sk, nonblock);
760
761         do {
762                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
763
764                 if (skb == NULL)
765                         goto verify_sock_status;
766
767                 dh = dccp_hdr(skb);
768
769                 switch (dh->dccph_type) {
770                 case DCCP_PKT_DATA:
771                 case DCCP_PKT_DATAACK:
772                         goto found_ok_skb;
773
774                 case DCCP_PKT_CLOSE:
775                 case DCCP_PKT_CLOSEREQ:
776                         if (!(flags & MSG_PEEK))
777                                 dccp_finish_passive_close(sk);
778                         /* fall through */
779                 case DCCP_PKT_RESET:
780                         dccp_pr_debug("found fin (%s) ok!\n",
781                                       dccp_packet_name(dh->dccph_type));
782                         len = 0;
783                         goto found_fin_ok;
784                 default:
785                         dccp_pr_debug("packet_type=%s\n",
786                                       dccp_packet_name(dh->dccph_type));
787                         sk_eat_skb(sk, skb, 0);
788                 }
789 verify_sock_status:
790                 if (sock_flag(sk, SOCK_DONE)) {
791                         len = 0;
792                         break;
793                 }
794
795                 if (sk->sk_err) {
796                         len = sock_error(sk);
797                         break;
798                 }
799
800                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
801                         len = 0;
802                         break;
803                 }
804
805                 if (sk->sk_state == DCCP_CLOSED) {
806                         if (!sock_flag(sk, SOCK_DONE)) {
807                                 /* This occurs when user tries to read
808                                  * from never connected socket.
809                                  */
810                                 len = -ENOTCONN;
811                                 break;
812                         }
813                         len = 0;
814                         break;
815                 }
816
817                 if (!timeo) {
818                         len = -EAGAIN;
819                         break;
820                 }
821
822                 if (signal_pending(current)) {
823                         len = sock_intr_errno(timeo);
824                         break;
825                 }
826
827                 sk_wait_data(sk, &timeo);
828                 continue;
829         found_ok_skb:
830                 if (len > skb->len)
831                         len = skb->len;
832                 else if (len < skb->len)
833                         msg->msg_flags |= MSG_TRUNC;
834
835                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
836                         /* Exception. Bailout! */
837                         len = -EFAULT;
838                         break;
839                 }
840         found_fin_ok:
841                 if (!(flags & MSG_PEEK))
842                         sk_eat_skb(sk, skb, 0);
843                 break;
844         } while (1);
845 out:
846         release_sock(sk);
847         return len;
848 }
849
850 EXPORT_SYMBOL_GPL(dccp_recvmsg);
851
852 int inet_dccp_listen(struct socket *sock, int backlog)
853 {
854         struct sock *sk = sock->sk;
855         unsigned char old_state;
856         int err;
857
858         lock_sock(sk);
859
860         err = -EINVAL;
861         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
862                 goto out;
863
864         old_state = sk->sk_state;
865         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
866                 goto out;
867
868         /* Really, if the socket is already in listen state
869          * we can only allow the backlog to be adjusted.
870          */
871         if (old_state != DCCP_LISTEN) {
872                 /*
873                  * FIXME: here it probably should be sk->sk_prot->listen_start
874                  * see tcp_listen_start
875                  */
876                 err = dccp_listen_start(sk, backlog);
877                 if (err)
878                         goto out;
879         }
880         sk->sk_max_ack_backlog = backlog;
881         err = 0;
882
883 out:
884         release_sock(sk);
885         return err;
886 }
887
888 EXPORT_SYMBOL_GPL(inet_dccp_listen);
889
890 static void dccp_terminate_connection(struct sock *sk)
891 {
892         u8 next_state = DCCP_CLOSED;
893
894         switch (sk->sk_state) {
895         case DCCP_PASSIVE_CLOSE:
896         case DCCP_PASSIVE_CLOSEREQ:
897                 dccp_finish_passive_close(sk);
898                 break;
899         case DCCP_PARTOPEN:
900                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
901                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
902                 /* fall through */
903         case DCCP_OPEN:
904                 dccp_send_close(sk, 1);
905
906                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
907                     !dccp_sk(sk)->dccps_server_timewait)
908                         next_state = DCCP_ACTIVE_CLOSEREQ;
909                 else
910                         next_state = DCCP_CLOSING;
911                 /* fall through */
912         default:
913                 dccp_set_state(sk, next_state);
914         }
915 }
916
917 void dccp_close(struct sock *sk, long timeout)
918 {
919         struct dccp_sock *dp = dccp_sk(sk);
920         struct sk_buff *skb;
921         u32 data_was_unread = 0;
922         int state;
923
924         lock_sock(sk);
925
926         sk->sk_shutdown = SHUTDOWN_MASK;
927
928         if (sk->sk_state == DCCP_LISTEN) {
929                 dccp_set_state(sk, DCCP_CLOSED);
930
931                 /* Special case. */
932                 inet_csk_listen_stop(sk);
933
934                 goto adjudge_to_death;
935         }
936
937         sk_stop_timer(sk, &dp->dccps_xmit_timer);
938
939         /*
940          * We need to flush the recv. buffs.  We do this only on the
941          * descriptor close, not protocol-sourced closes, because the
942           *reader process may not have drained the data yet!
943          */
944         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
945                 data_was_unread += skb->len;
946                 __kfree_skb(skb);
947         }
948
949         if (data_was_unread) {
950                 /* Unread data was tossed, send an appropriate Reset Code */
951                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
952                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
953                 dccp_set_state(sk, DCCP_CLOSED);
954         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
955                 /* Check zero linger _after_ checking for unread data. */
956                 sk->sk_prot->disconnect(sk, 0);
957         } else if (sk->sk_state != DCCP_CLOSED) {
958                 dccp_terminate_connection(sk);
959         }
960
961         sk_stream_wait_close(sk, timeout);
962
963 adjudge_to_death:
964         state = sk->sk_state;
965         sock_hold(sk);
966         sock_orphan(sk);
967
968         /*
969          * It is the last release_sock in its life. It will remove backlog.
970          */
971         release_sock(sk);
972         /*
973          * Now socket is owned by kernel and we acquire BH lock
974          * to finish close. No need to check for user refs.
975          */
976         local_bh_disable();
977         bh_lock_sock(sk);
978         WARN_ON(sock_owned_by_user(sk));
979
980         percpu_counter_inc(sk->sk_prot->orphan_count);
981
982         /* Have we already been destroyed by a softirq or backlog? */
983         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
984                 goto out;
985
986         if (sk->sk_state == DCCP_CLOSED)
987                 inet_csk_destroy_sock(sk);
988
989         /* Otherwise, socket is reprieved until protocol close. */
990
991 out:
992         bh_unlock_sock(sk);
993         local_bh_enable();
994         sock_put(sk);
995 }
996
997 EXPORT_SYMBOL_GPL(dccp_close);
998
999 void dccp_shutdown(struct sock *sk, int how)
1000 {
1001         dccp_pr_debug("called shutdown(%x)\n", how);
1002 }
1003
1004 EXPORT_SYMBOL_GPL(dccp_shutdown);
1005
1006 static inline int dccp_mib_init(void)
1007 {
1008         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1009 }
1010
1011 static inline void dccp_mib_exit(void)
1012 {
1013         snmp_mib_free((void**)dccp_statistics);
1014 }
1015
1016 static int thash_entries;
1017 module_param(thash_entries, int, 0444);
1018 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1019
1020 #ifdef CONFIG_IP_DCCP_DEBUG
1021 int dccp_debug;
1022 module_param(dccp_debug, bool, 0644);
1023 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1024
1025 EXPORT_SYMBOL_GPL(dccp_debug);
1026 #endif
1027
1028 static int __init dccp_init(void)
1029 {
1030         unsigned long goal;
1031         int ehash_order, bhash_order, i;
1032         int rc;
1033
1034         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1035                      FIELD_SIZEOF(struct sk_buff, cb));
1036         rc = percpu_counter_init(&dccp_orphan_count, 0);
1037         if (rc)
1038                 goto out;
1039         rc = -ENOBUFS;
1040         inet_hashinfo_init(&dccp_hashinfo);
1041         dccp_hashinfo.bind_bucket_cachep =
1042                 kmem_cache_create("dccp_bind_bucket",
1043                                   sizeof(struct inet_bind_bucket), 0,
1044                                   SLAB_HWCACHE_ALIGN, NULL);
1045         if (!dccp_hashinfo.bind_bucket_cachep)
1046                 goto out_free_percpu;
1047
1048         /*
1049          * Size and allocate the main established and bind bucket
1050          * hash tables.
1051          *
1052          * The methodology is similar to that of the buffer cache.
1053          */
1054         if (num_physpages >= (128 * 1024))
1055                 goal = num_physpages >> (21 - PAGE_SHIFT);
1056         else
1057                 goal = num_physpages >> (23 - PAGE_SHIFT);
1058
1059         if (thash_entries)
1060                 goal = (thash_entries *
1061                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1062         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1063                 ;
1064         do {
1065                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1066                                         sizeof(struct inet_ehash_bucket);
1067                 while (dccp_hashinfo.ehash_size &
1068                        (dccp_hashinfo.ehash_size - 1))
1069                         dccp_hashinfo.ehash_size--;
1070                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1071                         __get_free_pages(GFP_ATOMIC, ehash_order);
1072         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1073
1074         if (!dccp_hashinfo.ehash) {
1075                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1076                 goto out_free_bind_bucket_cachep;
1077         }
1078
1079         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1080                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1081                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1082         }
1083
1084         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1085                         goto out_free_dccp_ehash;
1086
1087         bhash_order = ehash_order;
1088
1089         do {
1090                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1091                                         sizeof(struct inet_bind_hashbucket);
1092                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1093                     bhash_order > 0)
1094                         continue;
1095                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1096                         __get_free_pages(GFP_ATOMIC, bhash_order);
1097         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1098
1099         if (!dccp_hashinfo.bhash) {
1100                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1101                 goto out_free_dccp_locks;
1102         }
1103
1104         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1105                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1106                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1107         }
1108
1109         rc = dccp_mib_init();
1110         if (rc)
1111                 goto out_free_dccp_bhash;
1112
1113         rc = dccp_ackvec_init();
1114         if (rc)
1115                 goto out_free_dccp_mib;
1116
1117         rc = dccp_sysctl_init();
1118         if (rc)
1119                 goto out_ackvec_exit;
1120
1121         rc = ccid_initialize_builtins();
1122         if (rc)
1123                 goto out_sysctl_exit;
1124
1125         dccp_timestamping_init();
1126 out:
1127         return rc;
1128 out_sysctl_exit:
1129         dccp_sysctl_exit();
1130 out_ackvec_exit:
1131         dccp_ackvec_exit();
1132 out_free_dccp_mib:
1133         dccp_mib_exit();
1134 out_free_dccp_bhash:
1135         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1136         dccp_hashinfo.bhash = NULL;
1137 out_free_dccp_locks:
1138         inet_ehash_locks_free(&dccp_hashinfo);
1139 out_free_dccp_ehash:
1140         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1141         dccp_hashinfo.ehash = NULL;
1142 out_free_bind_bucket_cachep:
1143         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1144         dccp_hashinfo.bind_bucket_cachep = NULL;
1145 out_free_percpu:
1146         percpu_counter_destroy(&dccp_orphan_count);
1147         goto out;
1148 }
1149
1150 static void __exit dccp_fini(void)
1151 {
1152         ccid_cleanup_builtins();
1153         dccp_mib_exit();
1154         free_pages((unsigned long)dccp_hashinfo.bhash,
1155                    get_order(dccp_hashinfo.bhash_size *
1156                              sizeof(struct inet_bind_hashbucket)));
1157         free_pages((unsigned long)dccp_hashinfo.ehash,
1158                    get_order(dccp_hashinfo.ehash_size *
1159                              sizeof(struct inet_ehash_bucket)));
1160         inet_ehash_locks_free(&dccp_hashinfo);
1161         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1162         dccp_ackvec_exit();
1163         dccp_sysctl_exit();
1164 }
1165
1166 module_init(dccp_init);
1167 module_exit(dccp_fini);
1168
1169 MODULE_LICENSE("GPL");
1170 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1171 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");