X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Fip_fragment.c;h=75347ea70ea071d6fef5223535b6bb83973b602b;hb=ebda37c27d0c768947e9b058332d7ea798210cf8;hp=3b2e5adca8384dd1ec83224329c89d9709475dc4;hpb=12b101555f4a67db67a66966a516075bd477741f;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 3b2e5ad..75347ea 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,10 +5,8 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $ - * * Authors: Fred N. van Kempen - * Alan Cox + * Alan Cox * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. @@ -34,6 +32,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -58,7 +59,7 @@ struct ipfrag_skb_cb int offset; }; -#define FRAG_CB(skb) ((struct ipfrag_skb_cb*)((skb)->cb)) +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { @@ -180,7 +181,7 @@ static void ip_evictor(struct net *net) evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); if (evicted) - IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); + IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); } /* @@ -189,26 +190,53 @@ static void ip_evictor(struct net *net) static void ip_expire(unsigned long arg) { struct ipq *qp; + struct net *net; qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); + net = container_of(qp->q.net, struct net, ipv4.frags); spin_lock(&qp->q.lock); - if (qp->q.last_in & COMPLETE) + if (qp->q.last_in & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); - IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { + if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; - /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { - icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); - dev_put(head->dev); + + rcu_read_lock(); + head->dev = dev_get_by_index_rcu(net, qp->iif); + if (!head->dev) + goto out_rcu_unlock; + + /* + * Only search router table for the head fragment, + * when defraging timeout at PRE_ROUTING HOOK. + */ + if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { + const struct iphdr *iph = ip_hdr(head); + int err = ip_route_input(head, iph->daddr, iph->saddr, + iph->tos, head->dev); + if (unlikely(err)) + goto out_rcu_unlock; + + /* + * Only an end host needs to send an ICMP + * "Fragment Reassembly Timeout" message, per RFC792. + */ + if (skb_rtable(head)->rt_type != RTN_LOCAL) + goto out_rcu_unlock; + } + + /* Send an ICMP "Fragment Reassembly Timeout" message. */ + icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); +out_rcu_unlock: + rcu_read_unlock(); } out: spin_unlock(&qp->q.lock); @@ -226,6 +254,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) arg.iph = iph; arg.user = user; + + read_lock(&ip4_frags.lock); hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); @@ -258,7 +288,10 @@ static inline int ip_frag_too_far(struct ipq *qp) rc = qp->q.fragments && (end - start) > max; if (rc) { - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + struct net *net; + + net = container_of(qp->q.net, struct net, ipv4.frags); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); } return rc; @@ -298,7 +331,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) int ihl, end; int err = -ENOENT; - if (qp->q.last_in & COMPLETE) + if (qp->q.last_in & INET_FRAG_COMPLETE) goto err; if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && @@ -324,9 +357,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * or have different end, the segment is corrrupted. */ if (end < qp->q.len || - ((qp->q.last_in & LAST_IN) && end != qp->q.len)) + ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) goto err; - qp->q.last_in |= LAST_IN; + qp->q.last_in |= INET_FRAG_LAST_IN; qp->q.len = end; } else { if (end&7) { @@ -336,7 +369,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ - if (qp->q.last_in & LAST_IN) + if (qp->q.last_in & INET_FRAG_LAST_IN) goto err; qp->q.len = end; } @@ -435,9 +468,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->q.meat += skb->len; atomic_add(skb->truesize, &qp->q.net->mem); if (offset == 0) - qp->q.last_in |= FIRST_IN; + qp->q.last_in |= INET_FRAG_FIRST_IN; - if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) + if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + qp->q.meat == qp->q.len) return ip_frag_reasm(qp, prev, dev); write_lock(&ip4_frags.lock); @@ -456,6 +490,7 @@ err: static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct net_device *dev) { + struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct iphdr *iph; struct sk_buff *fp, *head = qp->q.fragments; int len; @@ -481,8 +516,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, qp->q.fragments = head; } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG_CB(head)->offset != 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); @@ -499,7 +534,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ - if (skb_shinfo(head)->frag_list) { + if (skb_has_frags(head)) { struct sk_buff *clone; int i, plen = 0; @@ -508,7 +543,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, clone->next = head->next; head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_shinfo(head)->frag_list = NULL; + skb_frag_list_init(head); for (i=0; inr_frags; i++) plen += skb_shinfo(head)->frags[i].size; clone->len = clone->data_len = head->data_len - plen; @@ -541,7 +576,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); - IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; return 0; @@ -552,11 +587,10 @@ out_nomem: goto out_fail; out_oversize: if (net_ratelimit()) - printk(KERN_INFO - "Oversized IP packet from %d.%d.%d.%d.\n", - NIPQUAD(qp->saddr)); + printk(KERN_INFO "Oversized IP packet from %pI4.\n", + &qp->saddr); out_fail: - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); return err; } @@ -566,9 +600,9 @@ int ip_defrag(struct sk_buff *skb, u32 user) struct ipq *qp; struct net *net; - IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); + net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); - net = skb->dev ? skb->dev->nd_net : skb->dst->dev->nd_net; /* Start by cleaning up the memory. */ if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) ip_evictor(net); @@ -586,7 +620,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) return ret; } - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -ENOMEM; } @@ -594,68 +628,64 @@ int ip_defrag(struct sk_buff *skb, u32 user) #ifdef CONFIG_SYSCTL static int zero; -static struct ctl_table ip4_frags_ctl_table[] = { +static struct ctl_table ip4_frags_ns_ctl_table[] = { { - .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", .data = &init_net.ipv4.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, .procname = "ipfrag_low_thresh", .data = &init_net.ipv4.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_TIME, .procname = "ipfrag_time", .data = &init_net.ipv4.frags.timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, }, + { } +}; + +static struct ctl_table ip4_frags_ctl_table[] = { { - .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", .data = &ip4_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ipfrag_max_dist", .data = &sysctl_ipfrag_max_dist, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero }, { } }; -static int ip4_frags_ctl_register(struct net *net) +static int __net_init ip4_frags_ns_ctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; - table = ip4_frags_ctl_table; - if (net != &init_net) { - table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL); + table = ip4_frags_ns_ctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; table[0].data = &net->ipv4.frags.high_thresh; table[1].data = &net->ipv4.frags.low_thresh; table[2].data = &net->ipv4.frags.timeout; - table[3].mode &= ~0222; - table[4].mode &= ~0222; } hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); @@ -666,13 +696,13 @@ static int ip4_frags_ctl_register(struct net *net) return 0; err_reg: - if (net != &init_net) + if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } -static void ip4_frags_ctl_unregister(struct net *net) +static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) { struct ctl_table *table; @@ -680,18 +710,27 @@ static void ip4_frags_ctl_unregister(struct net *net) unregister_net_sysctl_table(net->ipv4.frags_hdr); kfree(table); } + +static void ip4_frags_ctl_register(void) +{ + register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); +} #else -static inline int ip4_frags_ctl_register(struct net *net) +static inline int ip4_frags_ns_ctl_register(struct net *net) { return 0; } -static inline void ip4_frags_ctl_unregister(struct net *net) +static inline void ip4_frags_ns_ctl_unregister(struct net *net) +{ +} + +static inline void ip4_frags_ctl_register(void) { } #endif -static int ipv4_frags_init_net(struct net *net) +static int __net_init ipv4_frags_init_net(struct net *net) { /* * Fragment cache limits. We will commit 256K at one time. Should we @@ -710,12 +749,12 @@ static int ipv4_frags_init_net(struct net *net) inet_frags_init_net(&net->ipv4.frags); - return ip4_frags_ctl_register(net); + return ip4_frags_ns_ctl_register(net); } -static void ipv4_frags_exit_net(struct net *net) +static void __net_exit ipv4_frags_exit_net(struct net *net) { - ip4_frags_ctl_unregister(net); + ip4_frags_ns_ctl_unregister(net); inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); } @@ -726,6 +765,7 @@ static struct pernet_operations ip4_frags_ops = { void __init ipfrag_init(void) { + ip4_frags_ctl_register(); register_pernet_subsys(&ip4_frags_ops); ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init;