#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/module.h>
const struct sock *sk2))
{
struct sock *sk2;
- struct hlist_node *node;
+ struct hlist_nulls_node *node;
- sk_for_each(sk2, node, &hslot->head)
+ sk_nulls_for_each(sk2, node, &hslot->head)
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
sk2->sk_hash == num &&
inet_sk(sk)->num = snum;
sk->sk_hash = snum;
if (sk_unhashed(sk)) {
- sk_add_node(sk, &hslot->head);
+ sk_nulls_add_node_rcu(sk, &hslot->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
error = 0;
__be16 sport, __be32 daddr, __be16 dport,
int dif, struct udp_table *udptable)
{
- struct sock *sk, *result = NULL;
- struct hlist_node *node;
+ struct sock *sk, *result;
+ struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash = udp_hashfn(net, hnum);
struct udp_hslot *hslot = &udptable->hash[hash];
- int score, badness = -1;
+ int score, badness;
- spin_lock(&hslot->lock);
- sk_for_each(sk, node, &hslot->head) {
+ rcu_read_lock();
+begin:
+ result = NULL;
+ badness = -1;
+ sk_nulls_for_each_rcu(sk, node, &hslot->head) {
score = compute_score(sk, net, saddr, hnum, sport,
daddr, dport, dif);
if (score > badness) {
badness = score;
}
}
- if (result)
- sock_hold(result);
- spin_unlock(&hslot->lock);
+ /*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != hash)
+ goto begin;
+
+ if (result) {
+ if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ result = NULL;
+ else if (unlikely(compute_score(result, net, saddr, hnum, sport,
+ daddr, dport, dif) < badness)) {
+ sock_put(result);
+ goto begin;
+ }
+ }
+ rcu_read_unlock();
return result;
}
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
-static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
int dif)
{
- struct hlist_node *node;
+ struct hlist_nulls_node *node;
struct sock *s = sk;
unsigned short hnum = ntohs(loc_port);
- sk_for_each_from(s, node) {
+ sk_nulls_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
- if (s->sk_hash != hnum ||
+ if (!net_eq(sock_net(s), net) ||
+ s->sk_hash != hnum ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
(inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
.saddr = saddr,
.tos = tos } },
.proto = sk->sk_protocol,
+ .flags = inet_sk_flowi_flags(sk),
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
- sizeof(struct udphdr), &ipc, rt,
+ sizeof(struct udphdr), &ipc, &rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_flush_pending_frames(sk);
void udp_lib_unhash(struct sock *sk)
{
- struct udp_table *udptable = sk->sk_prot->h.udp_table;
- unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash);
- struct udp_hslot *hslot = &udptable->hash[hash];
+ if (sk_hashed(sk)) {
+ struct udp_table *udptable = sk->sk_prot->h.udp_table;
+ unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash);
+ struct udp_hslot *hslot = &udptable->hash[hash];
- spin_lock(&hslot->lock);
- if (sk_del_node_init(sk)) {
- inet_sk(sk)->num = 0;
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ spin_lock_bh(&hslot->lock);
+ if (sk_nulls_del_node_init_rcu(sk)) {
+ inet_sk(sk)->num = 0;
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ }
+ spin_unlock_bh(&hslot->lock);
}
- spin_unlock(&hslot->lock);
}
EXPORT_SYMBOL(udp_lib_unhash);
int dif;
spin_lock(&hslot->lock);
- sk = sk_head(&hslot->head);
+ sk = sk_nulls_head(&hslot->head);
dif = skb->dev->ifindex;
- sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+ sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
struct sock *sknext = NULL;
do {
struct sk_buff *skb1 = skb;
- sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
+ sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
+ daddr, uh->source, saddr,
+ dif);
if (sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);
return 0;
short_packet:
- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n",
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
- NIPQUAD(saddr),
+ &saddr,
ntohs(uh->source),
ulen,
skb->len,
- NIPQUAD(daddr),
+ &daddr,
ntohs(uh->dest));
goto drop;
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n",
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
- NIPQUAD(saddr),
+ &saddr,
ntohs(uh->source),
- NIPQUAD(daddr),
+ &daddr,
ntohs(uh->dest),
ulen);
drop:
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp_sock),
+ .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
struct net *net = seq_file_net(seq);
for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
- struct hlist_node *node;
+ struct hlist_nulls_node *node;
struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
spin_lock_bh(&hslot->lock);
- sk_for_each(sk, node, &hslot->head) {
+ sk_nulls_for_each(sk, node, &hslot->head) {
if (!net_eq(sock_net(sk), net))
continue;
if (sk->sk_family == state->family)
struct net *net = seq_file_net(seq);
do {
- sk = sk_next(sk);
+ sk = sk_nulls_next(sk);
} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
if (!sk) {
- spin_unlock(&state->udp_table->hash[state->bucket].lock);
+ spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
return udp_get_first(seq, state->bucket + 1);
}
return sk;
int i;
for (i = 0; i < UDP_HTABLE_SIZE; i++) {
- INIT_HLIST_HEAD(&table->hash[i].head);
+ INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
spin_lock_init(&table->hash[i].lock);
}
}
void __init udp_init(void)
{
- unsigned long limit;
+ unsigned long nr_pages, limit;
udp_table_init(&udp_table);
/* Set the pressure threshold up by the same strategy of TCP. It is a
* fraction of global memory that is up to 1/2 at 256 MB, decreasing
* toward zero with the amount of memory, with a floor of 128 pages.
*/
- limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
- limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+ nr_pages = totalram_pages - totalhigh_pages;
+ limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+ limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
limit = max(limit, 128UL);
sysctl_udp_mem[0] = limit / 4 * 3;
sysctl_udp_mem[1] = limit;