2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * The User Datagram Protocol (UDP).
8 * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Alan Cox, <Alan.Cox@linux.org>
14 * Hirokazu Takahashi, <taka@valinux.co.jp>
17 * Alan Cox : verify_area() calls
18 * Alan Cox : stopped close while in use off icmp
19 * messages. Not a fix but a botch that
20 * for udp at least is 'valid'.
21 * Alan Cox : Fixed icmp handling properly
22 * Alan Cox : Correct error for oversized datagrams
23 * Alan Cox : Tidied select() semantics.
24 * Alan Cox : udp_err() fixed properly, also now
25 * select and read wake correctly on errors
26 * Alan Cox : udp_send verify_area moved to avoid mem leak
27 * Alan Cox : UDP can count its memory
28 * Alan Cox : send to an unknown connection causes
29 * an ECONNREFUSED off the icmp, but
31 * Alan Cox : Switched to new sk_buff handlers. No more backlog!
32 * Alan Cox : Using generic datagram code. Even smaller and the PEEK
33 * bug no longer crashes it.
34 * Fred Van Kempen : Net2e support for sk->broadcast.
35 * Alan Cox : Uses skb_free_datagram
36 * Alan Cox : Added get/set sockopt support.
37 * Alan Cox : Broadcasting without option set returns EACCES.
38 * Alan Cox : No wakeup calls. Instead we now use the callbacks.
39 * Alan Cox : Use ip_tos and ip_ttl
40 * Alan Cox : SNMP Mibs
41 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
42 * Matt Dillon : UDP length checks.
43 * Alan Cox : Smarter af_inet used properly.
44 * Alan Cox : Use new kernel side addressing.
45 * Alan Cox : Incorrect return on truncated datagram receive.
46 * Arnt Gulbrandsen : New udp_send and stuff
47 * Alan Cox : Cache last socket
48 * Alan Cox : Route cache
49 * Jon Peatfield : Minor efficiency fix to sendto().
50 * Mike Shaver : RFC1122 checks.
51 * Alan Cox : Nonblocking error fix.
52 * Willy Konynenberg : Transparent proxying support.
53 * Mike McLagan : Routing by source
54 * David S. Miller : New socket lookup architecture.
55 * Last socket cache retained as it
56 * does have a high hit rate.
57 * Olaf Kirch : Don't linearise iovec on sendmsg.
58 * Andi Kleen : Some cleanups, cache destination entry
60 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
61 * Melvin Smith : Check msg_name not msg_namelen in sendto(),
62 * return ENOTCONN for unconnected sockets (POSIX)
63 * Janos Farkas : don't deliver multi/broadcasts to a different
64 * bound-to-device socket
65 * Hirokazu Takahashi : HW checksumming for outgoing UDP
67 * Hirokazu Takahashi : sendfile() on UDP works now.
68 * Arnaldo C. Melo : convert /proc/net/udp to seq_file
69 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
70 * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind
71 * a single port at the same time.
72 * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
73 * James Chapman : Add L2TP encapsulation type.
76 * This program is free software; you can redistribute it and/or
77 * modify it under the terms of the GNU General Public License
78 * as published by the Free Software Foundation; either version
79 * 2 of the License, or (at your option) any later version.
82 #include <asm/system.h>
83 #include <asm/uaccess.h>
84 #include <asm/ioctls.h>
85 #include <linux/bootmem.h>
86 #include <linux/types.h>
87 #include <linux/fcntl.h>
88 #include <linux/module.h>
89 #include <linux/socket.h>
90 #include <linux/sockios.h>
91 #include <linux/igmp.h>
93 #include <linux/errno.h>
94 #include <linux/timer.h>
96 #include <linux/inet.h>
97 #include <linux/netdevice.h>
98 #include <net/tcp_states.h>
99 #include <linux/skbuff.h>
100 #include <linux/proc_fs.h>
101 #include <linux/seq_file.h>
102 #include <net/net_namespace.h>
103 #include <net/icmp.h>
104 #include <net/route.h>
105 #include <net/checksum.h>
106 #include <net/xfrm.h>
107 #include "udp_impl.h"
110 * Snmp MIB for the UDP layer
113 DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
114 EXPORT_SYMBOL(udp_statistics);
116 DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
117 EXPORT_SYMBOL(udp_stats_in6);
119 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
120 DEFINE_RWLOCK(udp_hash_lock);
122 int sysctl_udp_mem[3] __read_mostly;
123 int sysctl_udp_rmem_min __read_mostly;
124 int sysctl_udp_wmem_min __read_mostly;
126 EXPORT_SYMBOL(sysctl_udp_mem);
127 EXPORT_SYMBOL(sysctl_udp_rmem_min);
128 EXPORT_SYMBOL(sysctl_udp_wmem_min);
130 atomic_t udp_memory_allocated;
131 EXPORT_SYMBOL(udp_memory_allocated);
133 static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
134 const struct hlist_head udptable[])
137 struct hlist_node *node;
139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
140 if (sk->sk_net == net && sk->sk_hash == num)
146 * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
148 * @sk: socket struct in question
149 * @snum: port number to look up
150 * @udptable: hash list table, must be of UDP_HTABLE_SIZE
151 * @saddr_comp: AF-dependent comparison of bound local IP addresses
153 int __udp_lib_get_port(struct sock *sk, unsigned short snum,
154 struct hlist_head udptable[],
155 int (*saddr_comp)(const struct sock *sk1,
156 const struct sock *sk2 ) )
158 struct hlist_node *node;
159 struct hlist_head *head;
162 struct net *net = sk->sk_net;
164 write_lock_bh(&udp_hash_lock);
167 int i, low, high, remaining;
168 unsigned rover, best, best_size_so_far;
170 inet_get_local_port_range(&low, &high);
171 remaining = (high - low) + 1;
173 best_size_so_far = UINT_MAX;
174 best = rover = net_random() % remaining + low;
176 /* 1st pass: look for empty (or shortest) hash chain */
177 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
180 head = &udptable[rover & (UDP_HTABLE_SIZE - 1)];
181 if (hlist_empty(head))
184 sk_for_each(sk2, node, head) {
185 if (++size >= best_size_so_far)
188 best_size_so_far = size;
191 /* fold back if end of range */
193 rover = low + ((rover - low)
194 & (UDP_HTABLE_SIZE - 1));
199 /* 2nd pass: find hole in shortest hash chain */
201 for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
202 if (! __udp_lib_lport_inuse(net, rover, udptable))
204 rover += UDP_HTABLE_SIZE;
206 rover = low + ((rover - low)
207 & (UDP_HTABLE_SIZE - 1));
211 /* All ports in use! */
217 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
219 sk_for_each(sk2, node, head)
220 if (sk2->sk_hash == snum &&
222 sk2->sk_net == net &&
223 (!sk2->sk_reuse || !sk->sk_reuse) &&
224 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
225 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
226 (*saddr_comp)(sk, sk2) )
230 inet_sk(sk)->num = snum;
232 if (sk_unhashed(sk)) {
233 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
234 sk_add_node(sk, head);
235 sock_prot_inuse_add(sk->sk_prot, 1);
239 write_unlock_bh(&udp_hash_lock);
243 int udp_get_port(struct sock *sk, unsigned short snum,
244 int (*scmp)(const struct sock *, const struct sock *))
246 return __udp_lib_get_port(sk, snum, udp_hash, scmp);
250 * IOCTL requests applicable to the UDP protocol
253 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
258 int amount = atomic_read(&sk->sk_wmem_alloc);
259 return put_user(amount, (int __user *)arg);
265 unsigned long amount;
268 spin_lock_bh(&sk->sk_receive_queue.lock);
269 skb = skb_peek(&sk->sk_receive_queue);
272 * We will only return the amount
273 * of this packet since that is all
276 amount = skb->len - sizeof(struct udphdr);
278 spin_unlock_bh(&sk->sk_receive_queue.lock);
279 return put_user(amount, (int __user *)arg);
289 int udp_disconnect(struct sock *sk, int flags)
291 struct inet_sock *inet = inet_sk(sk);
293 * 1003.1g - break association.
296 sk->sk_state = TCP_CLOSE;
299 sk->sk_bound_dev_if = 0;
300 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
301 inet_reset_saddr(sk);
303 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
304 sk->sk_prot->unhash(sk);
312 * Socket option code for UDP
314 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
315 char __user *optval, int optlen,
316 int (*push_pending_frames)(struct sock *))
318 struct udp_sock *up = udp_sk(sk);
321 #ifdef CONFIG_IP_UDPLITE
322 int is_udplite = IS_UDPLITE(sk);
325 if (optlen<sizeof(int))
328 if (get_user(val, (int __user *)optval))
338 (*push_pending_frames)(sk);
346 case UDP_ENCAP_ESPINUDP:
347 case UDP_ENCAP_ESPINUDP_NON_IKE:
348 up->encap_rcv = xfrm4_udp_encap_rcv;
350 case UDP_ENCAP_L2TPINUDP:
351 up->encap_type = val;
359 #ifdef CONFIG_IP_UDPLITE
361 * UDP-Lite's partial checksum coverage (RFC 3828).
363 /* The sender sets actual checksum coverage length via this option.
364 * The case coverage > packet length is handled by send module. */
365 case UDPLITE_SEND_CSCOV:
366 if (!is_udplite) /* Disable the option on UDP sockets */
368 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
371 up->pcflag |= UDPLITE_SEND_CC;
374 /* The receiver specifies a minimum checksum coverage value. To make
375 * sense, this should be set to at least 8 (as done below). If zero is
376 * used, this again means full checksum coverage. */
377 case UDPLITE_RECV_CSCOV:
378 if (!is_udplite) /* Disable the option on UDP sockets */
380 if (val != 0 && val < 8) /* Avoid silly minimal values. */
383 up->pcflag |= UDPLITE_RECV_CC;
395 int udp_lib_getsockopt(struct sock *sk, int level, int optname,
396 char __user *optval, int __user *optlen)
398 struct udp_sock *up = udp_sk(sk);
401 if (get_user(len,optlen))
404 len = min_t(unsigned int, len, sizeof(int));
415 val = up->encap_type;
418 /* The following two cannot be changed on UDP sockets, the return is
419 * always 0 (which corresponds to the full checksum coverage of UDP). */
420 case UDPLITE_SEND_CSCOV:
424 case UDPLITE_RECV_CSCOV:
432 if (put_user(len, optlen))
434 if (copy_to_user(optval, &val,len))
440 * udp_poll - wait for a UDP event.
441 * @file - file struct
445 * This is same as datagram poll, except for the special case of
446 * blocking sockets. If application is using a blocking fd
447 * and a packet with checksum error is in the queue;
448 * then it could get return from select indicating data available
449 * but then block when reading it. Add special case code
450 * to work around these arguably broken applications.
452 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
454 unsigned int mask = datagram_poll(file, sock, wait);
455 struct sock *sk = sock->sk;
456 int is_lite = IS_UDPLITE(sk);
458 /* Check for false positives due to checksum errors */
459 if ( (mask & POLLRDNORM) &&
460 !(file->f_flags & O_NONBLOCK) &&
461 !(sk->sk_shutdown & RCV_SHUTDOWN)){
462 struct sk_buff_head *rcvq = &sk->sk_receive_queue;
465 spin_lock_bh(&rcvq->lock);
466 while ((skb = skb_peek(rcvq)) != NULL &&
467 udp_lib_checksum_complete(skb)) {
468 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
469 __skb_unlink(skb, rcvq);
472 spin_unlock_bh(&rcvq->lock);
474 /* nothing to see, move along */
476 mask &= ~(POLLIN | POLLRDNORM);
484 /* ------------------------------------------------------------------------ */
485 #ifdef CONFIG_PROC_FS
487 static struct sock *udp_get_first(struct seq_file *seq)
490 struct udp_iter_state *state = seq->private;
492 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
493 struct hlist_node *node;
494 sk_for_each(sk, node, state->hashtable + state->bucket) {
495 if (sk->sk_family == state->family)
504 static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
506 struct udp_iter_state *state = seq->private;
512 } while (sk && sk->sk_family != state->family);
514 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
515 sk = sk_head(state->hashtable + state->bucket);
521 static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
523 struct sock *sk = udp_get_first(seq);
526 while (pos && (sk = udp_get_next(seq, sk)) != NULL)
528 return pos ? NULL : sk;
531 static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
532 __acquires(udp_hash_lock)
534 read_lock(&udp_hash_lock);
535 return *pos ? udp_get_idx(seq, *pos-1) : (void *)1;
538 static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
543 sk = udp_get_idx(seq, 0);
545 sk = udp_get_next(seq, v);
551 static void udp_seq_stop(struct seq_file *seq, void *v)
552 __releases(udp_hash_lock)
554 read_unlock(&udp_hash_lock);
557 static int udp_seq_open(struct inode *inode, struct file *file)
559 struct udp_seq_afinfo *afinfo = PDE(inode)->data;
560 struct seq_file *seq;
562 struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
566 s->family = afinfo->family;
567 s->hashtable = afinfo->hashtable;
568 s->seq_ops.start = udp_seq_start;
569 s->seq_ops.next = udp_seq_next;
570 s->seq_ops.show = afinfo->seq_show;
571 s->seq_ops.stop = udp_seq_stop;
573 rc = seq_open(file, &s->seq_ops);
577 seq = file->private_data;
586 /* ------------------------------------------------------------------------ */
587 int udp_proc_register(struct udp_seq_afinfo *afinfo)
589 struct proc_dir_entry *p;
594 afinfo->seq_fops->owner = afinfo->owner;
595 afinfo->seq_fops->open = udp_seq_open;
596 afinfo->seq_fops->read = seq_read;
597 afinfo->seq_fops->llseek = seq_lseek;
598 afinfo->seq_fops->release = seq_release_private;
600 p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops);
608 void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
612 proc_net_remove(&init_net, afinfo->name);
613 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
615 #endif /* CONFIG_PROC_FS */
617 void __init udp_init(void)
621 /* Set the pressure threshold up by the same strategy of TCP. It is a
622 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
623 * toward zero with the amount of memory, with a floor of 128 pages.
625 limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
626 limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
627 limit = max(limit, 128UL);
628 sysctl_udp_mem[0] = limit / 4 * 3;
629 sysctl_udp_mem[1] = limit;
630 sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
632 sysctl_udp_rmem_min = SK_MEM_QUANTUM;
633 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
636 EXPORT_SYMBOL(udp_disconnect);
637 EXPORT_SYMBOL(udp_hash);
638 EXPORT_SYMBOL(udp_hash_lock);
639 EXPORT_SYMBOL(udp_ioctl);
640 EXPORT_SYMBOL(udp_get_port);
641 EXPORT_SYMBOL(udp_lib_getsockopt);
642 EXPORT_SYMBOL(udp_lib_setsockopt);
643 EXPORT_SYMBOL(udp_poll);
645 #ifdef CONFIG_PROC_FS
646 EXPORT_SYMBOL(udp_proc_register);
647 EXPORT_SYMBOL(udp_proc_unregister);