X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Fip_fragment.c;h=1f1b82475eaf9cd076eae2c500693f4567f666fc;hb=adf30907d63893e4208dfe3f5c88ae12bc2f25d5;hp=32108cf2a7849eb2f54df6789bcbdcfbe5545e88;hpb=776c729e8d91b2740583a2169678f2d3f383458b;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 32108cf..1f1b824 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,10 +5,8 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $ - * * Authors: Fred N. van Kempen - * Alan Cox + * Alan Cox * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. @@ -39,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -49,20 +48,7 @@ * as well. Or notify me, at least. --ANK */ -/* Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to measurably - * harm machine performance. - */ -int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; -int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; - -int sysctl_ipfrag_max_dist __read_mostly = 64; - -/* Important NOTE! Fragment queue must be destroyed before MSL expires. - * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. - */ -int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; +static int sysctl_ipfrag_max_dist __read_mostly = 64; struct ipfrag_skb_cb { @@ -70,160 +56,109 @@ struct ipfrag_skb_cb int offset; }; -#define FRAG_CB(skb) ((struct ipfrag_skb_cb*)((skb)->cb)) +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; + u32 user; __be32 saddr; __be32 daddr; __be16 id; u8 protocol; - u8 last_in; -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 - - struct sk_buff *fragments; /* linked list of received fragments */ - int len; /* total length of original datagram */ - int meat; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* when will this queue expire? */ - ktime_t stamp; int iif; unsigned int rid; struct inet_peer *peer; }; -/* Hash table. */ - -#define IPQ_HASHSZ 64 - -/* Per-bucket lock is easy to add now. */ -static struct hlist_head ipq_hash[IPQ_HASHSZ]; -static DEFINE_RWLOCK(ipfrag_lock); -static u32 ipfrag_hash_rnd; -static LIST_HEAD(ipq_lru_list); -int ip_frag_nqueues = 0; +static struct inet_frags ip4_frags; -static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, - struct net_device *dev); - -static __inline__ void __ipq_unlink(struct ipq *qp) +int ip_frag_nqueues(struct net *net) { - hlist_del(&qp->list); - list_del(&qp->lru_list); - ip_frag_nqueues--; + return net->ipv4.frags.nqueues; } -static __inline__ void ipq_unlink(struct ipq *ipq) +int ip_frag_mem(struct net *net) { - write_lock(&ipfrag_lock); - __ipq_unlink(ipq); - write_unlock(&ipfrag_lock); + return atomic_read(&net->ipv4.frags.mem); } +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, + struct net_device *dev); + +struct ip4_create_arg { + struct iphdr *iph; + u32 user; +}; + static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, - ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); + ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); } -static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ipfrag_secret_rebuild(unsigned long dummy) +static unsigned int ip4_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; - - write_lock(&ipfrag_lock); - get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); - for (i = 0; i < IPQ_HASHSZ; i++) { - struct ipq *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { - unsigned int hval = ipqhashfn(q->id, q->saddr, - q->daddr, q->protocol); - - if (hval != i) { - hlist_del(&q->list); - - /* Relink to new hash chain. */ - hlist_add_head(&q->list, &ipq_hash[hval]); - } - } - } - write_unlock(&ipfrag_lock); + struct ipq *ipq; - mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); + ipq = container_of(q, struct ipq, q); + return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); } -atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ +static int ip4_frag_match(struct inet_frag_queue *q, void *a) +{ + struct ipq *qp; + struct ip4_create_arg *arg = a; + + qp = container_of(q, struct ipq, q); + return (qp->id == arg->iph->id && + qp->saddr == arg->iph->saddr && + qp->daddr == arg->iph->daddr && + qp->protocol == arg->iph->protocol && + qp->user == arg->user); +} /* Memory Tracking Functions. */ -static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) +static __inline__ void frag_kfree_skb(struct netns_frags *nf, + struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip_frag_mem); + atomic_sub(skb->truesize, &nf->mem); kfree_skb(skb); } -static __inline__ void frag_free_queue(struct ipq *qp, int *work) -{ - if (work) - *work -= sizeof(struct ipq); - atomic_sub(sizeof(struct ipq), &ip_frag_mem); - kfree(qp); -} - -static __inline__ struct ipq *frag_alloc_queue(void) +static void ip4_frag_init(struct inet_frag_queue *q, void *a) { - struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); - - if (!qp) - return NULL; - atomic_add(sizeof(struct ipq), &ip_frag_mem); - return qp; + struct ipq *qp = container_of(q, struct ipq, q); + struct ip4_create_arg *arg = a; + + qp->protocol = arg->iph->protocol; + qp->id = arg->iph->id; + qp->saddr = arg->iph->saddr; + qp->daddr = arg->iph->daddr; + qp->user = arg->user; + qp->peer = sysctl_ipfrag_max_dist ? + inet_getpeer(arg->iph->saddr, 1) : NULL; } - -/* Destruction primitives. */ - -/* Complete destruction of ipq. */ -static void ip_frag_destroy(struct ipq *qp, int *work) +static __inline__ void ip4_frag_free(struct inet_frag_queue *q) { - struct sk_buff *fp; - - BUG_TRAP(qp->last_in&COMPLETE); - BUG_TRAP(del_timer(&qp->timer) == 0); + struct ipq *qp; + qp = container_of(q, struct ipq, q); if (qp->peer) inet_putpeer(qp->peer); +} - /* Release all fragment data. */ - fp = qp->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - /* Finally, release the queue descriptor itself. */ - frag_free_queue(qp, work); -} +/* Destruction primitives. */ -static __inline__ void ipq_put(struct ipq *ipq, int *work) +static __inline__ void ipq_put(struct ipq *ipq) { - if (atomic_dec_and_test(&ipq->refcnt)) - ip_frag_destroy(ipq, work); + inet_frag_put(&ipq->q, &ip4_frags); } /* Kill ipq entry. It is not destroyed immediately, @@ -231,48 +166,19 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work) */ static void ipq_kill(struct ipq *ipq) { - if (del_timer(&ipq->timer)) - atomic_dec(&ipq->refcnt); - - if (!(ipq->last_in & COMPLETE)) { - ipq_unlink(ipq); - atomic_dec(&ipq->refcnt); - ipq->last_in |= COMPLETE; - } + inet_frag_kill(&ipq->q, &ip4_frags); } /* Memory limiting on fragments. Evictor trashes the oldest * fragment queue until we are back under the threshold. */ -static void ip_evictor(void) +static void ip_evictor(struct net *net) { - struct ipq *qp; - struct list_head *tmp; - int work; - - work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; - if (work <= 0) - return; - - while (work > 0) { - read_lock(&ipfrag_lock); - if (list_empty(&ipq_lru_list)) { - read_unlock(&ipfrag_lock); - return; - } - tmp = ipq_lru_list.next; - qp = list_entry(tmp, struct ipq, lru_list); - atomic_inc(&qp->refcnt); - read_unlock(&ipfrag_lock); - - spin_lock(&qp->lock); - if (!(qp->last_in&COMPLETE)) - ipq_kill(qp); - spin_unlock(&qp->lock); - - ipq_put(qp, &work); - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - } + int evicted; + + evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); + if (evicted) + IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); } /* @@ -280,142 +186,62 @@ static void ip_evictor(void) */ static void ip_expire(unsigned long arg) { - struct ipq *qp = (struct ipq *) arg; + struct ipq *qp; + struct net *net; - spin_lock(&qp->lock); + qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); + net = container_of(qp->q.net, struct net, ipv4.frags); - if (qp->last_in & COMPLETE) + spin_lock(&qp->q.lock); + + if (qp->q.last_in & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); - IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); + + if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { + struct sk_buff *head = qp->q.fragments; - if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { - struct sk_buff *head = qp->fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { + if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); dev_put(head->dev); } } out: - spin_unlock(&qp->lock); - ipq_put(qp, NULL); + spin_unlock(&qp->q.lock); + ipq_put(qp); } -/* Creation primitives. */ - -static struct ipq *ip_frag_intern(struct ipq *qp_in) +/* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and create new one, if nothing is found. + */ +static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) { - struct ipq *qp; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif + struct inet_frag_queue *q; + struct ip4_create_arg arg; unsigned int hash; - write_lock(&ipfrag_lock); - hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, - qp_in->protocol); -#ifdef CONFIG_SMP - /* With SMP race we have to recheck hash table, because - * such entry could be created on other cpu, while we - * promoted read lock to write lock. - */ - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { - if (qp->id == qp_in->id && - qp->saddr == qp_in->saddr && - qp->daddr == qp_in->daddr && - qp->protocol == qp_in->protocol && - qp->user == qp_in->user) { - atomic_inc(&qp->refcnt); - write_unlock(&ipfrag_lock); - qp_in->last_in |= COMPLETE; - ipq_put(qp_in, NULL); - return qp; - } - } -#endif - qp = qp_in; - - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) - atomic_inc(&qp->refcnt); - - atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &ipq_hash[hash]); - INIT_LIST_HEAD(&qp->lru_list); - list_add_tail(&qp->lru_list, &ipq_lru_list); - ip_frag_nqueues++; - write_unlock(&ipfrag_lock); - return qp; -} + arg.iph = iph; + arg.user = user; -/* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) -{ - struct ipq *qp; + read_lock(&ip4_frags.lock); + hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - if ((qp = frag_alloc_queue()) == NULL) + q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); + if (q == NULL) goto out_nomem; - qp->protocol = iph->protocol; - qp->last_in = 0; - qp->id = iph->id; - qp->saddr = iph->saddr; - qp->daddr = iph->daddr; - qp->user = user; - qp->len = 0; - qp->meat = 0; - qp->fragments = NULL; - qp->iif = 0; - qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; - - /* Initialize a timer for this entry. */ - init_timer(&qp->timer); - qp->timer.data = (unsigned long) qp; /* pointer to queue */ - qp->timer.function = ip_expire; /* expire function */ - spin_lock_init(&qp->lock); - atomic_set(&qp->refcnt, 1); - - return ip_frag_intern(qp); + return container_of(q, struct ipq, q); out_nomem: LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); return NULL; } -/* Find the correct entry in the "incomplete datagrams" queue for - * this IP datagram, and create new one, if nothing is found. - */ -static inline struct ipq *ip_find(struct iphdr *iph, u32 user) -{ - __be16 id = iph->id; - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; - __u8 protocol = iph->protocol; - unsigned int hash; - struct ipq *qp; - struct hlist_node *n; - - read_lock(&ipfrag_lock); - hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { - if (qp->id == id && - qp->saddr == saddr && - qp->daddr == daddr && - qp->protocol == protocol && - qp->user == user) { - atomic_inc(&qp->refcnt); - read_unlock(&ipfrag_lock); - return qp; - } - } - read_unlock(&ipfrag_lock); - - return ip_frag_create(iph, user); -} - /* Is the fragment too far ahead to be part of ipq? */ static inline int ip_frag_too_far(struct ipq *qp) { @@ -432,10 +258,13 @@ static inline int ip_frag_too_far(struct ipq *qp) end = atomic_inc_return(&peer->rid); qp->rid = end; - rc = qp->fragments && (end - start) > max; + rc = qp->q.fragments && (end - start) > max; if (rc) { - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + struct net *net; + + net = container_of(qp->q.net, struct net, ipv4.frags); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); } return rc; @@ -445,22 +274,22 @@ static int ip_frag_reinit(struct ipq *qp) { struct sk_buff *fp; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { - atomic_inc(&qp->refcnt); + if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { + atomic_inc(&qp->q.refcnt); return -ETIMEDOUT; } - fp = qp->fragments; + fp = qp->q.fragments; do { struct sk_buff *xp = fp->next; - frag_kfree_skb(fp, NULL); + frag_kfree_skb(qp->q.net, fp, NULL); fp = xp; } while (fp); - qp->last_in = 0; - qp->len = 0; - qp->meat = 0; - qp->fragments = NULL; + qp->q.last_in = 0; + qp->q.len = 0; + qp->q.meat = 0; + qp->q.fragments = NULL; qp->iif = 0; return 0; @@ -475,7 +304,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) int ihl, end; int err = -ENOENT; - if (qp->last_in & COMPLETE) + if (qp->q.last_in & INET_FRAG_COMPLETE) goto err; if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && @@ -500,22 +329,22 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) /* If we already have some bits beyond end * or have different end, the segment is corrrupted. */ - if (end < qp->len || - ((qp->last_in & LAST_IN) && end != qp->len)) + if (end < qp->q.len || + ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) goto err; - qp->last_in |= LAST_IN; - qp->len = end; + qp->q.last_in |= INET_FRAG_LAST_IN; + qp->q.len = end; } else { if (end&7) { end &= ~7; if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_NONE; } - if (end > qp->len) { + if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ - if (qp->last_in & LAST_IN) + if (qp->q.last_in & INET_FRAG_LAST_IN) goto err; - qp->len = end; + qp->q.len = end; } } if (end == offset) @@ -534,7 +363,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * this fragment, right? */ prev = NULL; - for (next = qp->fragments; next != NULL; next = next->next) { + for (next = qp->q.fragments; next != NULL; next = next->next) { if (FRAG_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -572,7 +401,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (!pskb_pull(next, i)) goto err; FRAG_CB(next)->offset += i; - qp->meat -= i; + qp->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -587,10 +416,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = next; else - qp->fragments = next; + qp->q.fragments = next; - qp->meat -= free_it->len; - frag_kfree_skb(free_it, NULL); + qp->q.meat -= free_it->len; + frag_kfree_skb(qp->q.net, free_it, NULL); } } @@ -601,25 +430,26 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = skb; else - qp->fragments = skb; + qp->q.fragments = skb; dev = skb->dev; if (dev) { qp->iif = dev->ifindex; skb->dev = NULL; } - qp->stamp = skb->tstamp; - qp->meat += skb->len; - atomic_add(skb->truesize, &ip_frag_mem); + qp->q.stamp = skb->tstamp; + qp->q.meat += skb->len; + atomic_add(skb->truesize, &qp->q.net->mem); if (offset == 0) - qp->last_in |= FIRST_IN; + qp->q.last_in |= INET_FRAG_FIRST_IN; - if (qp->last_in == (FIRST_IN | LAST_IN) && qp->meat == qp->len) + if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + qp->q.meat == qp->q.len) return ip_frag_reasm(qp, prev, dev); - write_lock(&ipfrag_lock); - list_move_tail(&qp->lru_list, &ipq_lru_list); - write_unlock(&ipfrag_lock); + write_lock(&ip4_frags.lock); + list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list); + write_unlock(&ip4_frags.lock); return -EINPROGRESS; err: @@ -633,8 +463,9 @@ err: static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct net_device *dev) { + struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct iphdr *iph; - struct sk_buff *fp, *head = qp->fragments; + struct sk_buff *fp, *head = qp->q.fragments; int len; int ihlen; int err; @@ -645,33 +476,31 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, if (prev) { head = prev->next; fp = skb_clone(head, GFP_ATOMIC); - if (!fp) goto out_nomem; fp->next = head->next; prev->next = fp; - skb_morph(head, qp->fragments); - head->next = qp->fragments->next; + skb_morph(head, qp->q.fragments); + head->next = qp->q.fragments->next; - kfree_skb(qp->fragments); - qp->fragments = head; + kfree_skb(qp->q.fragments); + qp->q.fragments = head; } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG_CB(head)->offset != 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); - len = ihlen + qp->len; + len = ihlen + qp->q.len; err = -E2BIG; if (len > 65535) goto out_oversize; /* Head of list must not be cloned. */ - err = -ENOMEM; if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) goto out_nomem; @@ -695,12 +524,12 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip_frag_mem); + atomic_add(clone->truesize, &qp->q.net->mem); } skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip_frag_mem); + atomic_sub(head->truesize, &qp->q.net->mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -710,31 +539,31 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip_frag_mem); + atomic_sub(fp->truesize, &qp->q.net->mem); } head->next = NULL; head->dev = dev; - head->tstamp = qp->stamp; + head->tstamp = qp->q.stamp; iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); - IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - qp->fragments = NULL; + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); + qp->q.fragments = NULL; return 0; out_nomem: LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " "queue %p\n", qp); + err = -ENOMEM; goto out_fail; out_oversize: if (net_ratelimit()) - printk(KERN_INFO - "Oversized IP packet from %d.%d.%d.%d.\n", - NIPQUAD(qp->saddr)); + printk(KERN_INFO "Oversized IP packet from %pI4.\n", + &qp->saddr); out_fail: - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); return err; } @@ -742,40 +571,190 @@ out_fail: int ip_defrag(struct sk_buff *skb, u32 user) { struct ipq *qp; + struct net *net; - IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); + net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) - ip_evictor(); + if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) + ip_evictor(net); /* Lookup (or create) queue header */ - if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { + if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { int ret; - spin_lock(&qp->lock); + spin_lock(&qp->q.lock); ret = ip_frag_queue(qp, skb); - spin_unlock(&qp->lock); - ipq_put(qp, NULL); + spin_unlock(&qp->q.lock); + ipq_put(qp); return ret; } - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -ENOMEM; } -void __init ipfrag_init(void) +#ifdef CONFIG_SYSCTL +static int zero; + +static struct ctl_table ip4_frags_ns_ctl_table[] = { + { + .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, + .procname = "ipfrag_high_thresh", + .data = &init_net.ipv4.frags.high_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, + .procname = "ipfrag_low_thresh", + .data = &init_net.ipv4.frags.low_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = NET_IPV4_IPFRAG_TIME, + .procname = "ipfrag_time", + .data = &init_net.ipv4.frags.timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies + }, + { } +}; + +static struct ctl_table ip4_frags_ctl_table[] = { + { + .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, + .procname = "ipfrag_secret_interval", + .data = &ip4_frags.secret_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies + }, + { + .procname = "ipfrag_max_dist", + .data = &sysctl_ipfrag_max_dist, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero + }, + { } +}; + +static int ip4_frags_ns_ctl_register(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = ip4_frags_ns_ctl_table; + if (net != &init_net) { + table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].data = &net->ipv4.frags.high_thresh; + table[1].data = &net->ipv4.frags.low_thresh; + table[2].data = &net->ipv4.frags.timeout; + } + + hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); + if (hdr == NULL) + goto err_reg; + + net->ipv4.frags_hdr = hdr; + return 0; + +err_reg: + if (net != &init_net) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void ip4_frags_ns_ctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->ipv4.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv4.frags_hdr); + kfree(table); +} + +static void ip4_frags_ctl_register(void) +{ + register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); +} +#else +static inline int ip4_frags_ns_ctl_register(struct net *net) +{ + return 0; +} + +static inline void ip4_frags_ns_ctl_unregister(struct net *net) +{ +} + +static inline void ip4_frags_ctl_register(void) { - ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); +} +#endif + +static int ipv4_frags_init_net(struct net *net) +{ + /* + * Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to + * measurably harm machine performance. + */ + net->ipv4.frags.high_thresh = 256 * 1024; + net->ipv4.frags.low_thresh = 192 * 1024; + /* + * Important NOTE! Fragment queue must be destroyed before MSL expires. + * RFC791 is wrong proposing to prolongate timer each fragment arrival + * by TTL. + */ + net->ipv4.frags.timeout = IP_FRAG_TIME; + + inet_frags_init_net(&net->ipv4.frags); + + return ip4_frags_ns_ctl_register(net); +} + +static void ipv4_frags_exit_net(struct net *net) +{ + ip4_frags_ns_ctl_unregister(net); + inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); +} - init_timer(&ipfrag_secret_timer); - ipfrag_secret_timer.function = ipfrag_secret_rebuild; - ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; - add_timer(&ipfrag_secret_timer); +static struct pernet_operations ip4_frags_ops = { + .init = ipv4_frags_init_net, + .exit = ipv4_frags_exit_net, +}; + +void __init ipfrag_init(void) +{ + ip4_frags_ctl_register(); + register_pernet_subsys(&ip4_frags_ops); + ip4_frags.hashfn = ip4_hashfn; + ip4_frags.constructor = ip4_frag_init; + ip4_frags.destructor = ip4_frag_free; + ip4_frags.skb_free = NULL; + ip4_frags.qsize = sizeof(struct ipq); + ip4_frags.match = ip4_frag_match; + ip4_frags.frag_expire = ip_expire; + ip4_frags.secret_interval = 10 * 60 * HZ; + inet_frags_init(&ip4_frags); } EXPORT_SYMBOL(ip_defrag);