X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv6%2Froute.c;h=c2bd74c5f8d979dee714b169750250250f1022e6;hb=974c37e9d88c3e5a3e56eb98cb9c84232eb2bdcb;hp=a429900d16aa697999ddf49b9f2de5cc7196e0ec;hpb=50eb431d6e98189eb40606fcd4d03ecd8e168afa;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a429900..c2bd74c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque * - * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -36,10 +34,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -79,7 +79,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static int ip6_dst_gc(void); +static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); @@ -87,16 +87,18 @@ static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_add_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex, unsigned pref); -static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_get_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex); #endif -static struct dst_ops ip6_dst_ops = { +static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, - .protocol = __constant_htons(ETH_P_IPV6), + .protocol = cpu_to_be16(ETH_P_IPV6), .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, @@ -105,8 +107,8 @@ static struct dst_ops ip6_dst_ops = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, - .local_out = ip6_local_out, - .entry_size = sizeof(struct rt6_info), + .local_out = __ip6_local_out, + .entries = ATOMIC_INIT(0), }; static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -115,14 +117,14 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, - .protocol = __constant_htons(ETH_P_IPV6), + .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rt6_info), + .entries = ATOMIC_INIT(0), }; -struct rt6_info ip6_null_entry = { +static struct rt6_info ip6_null_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -132,11 +134,10 @@ struct rt6_info ip6_null_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_null_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -146,7 +147,7 @@ struct rt6_info ip6_null_entry = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -struct rt6_info ip6_prohibit_entry = { +static struct rt6_info ip6_prohibit_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -156,16 +157,15 @@ struct rt6_info ip6_prohibit_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_prohibit_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; -struct rt6_info ip6_blk_hole_entry = { +static struct rt6_info ip6_blk_hole_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -175,11 +175,10 @@ struct rt6_info ip6_blk_hole_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = dst_discard, .output = dst_discard, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_blk_hole_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -187,9 +186,9 @@ struct rt6_info ip6_blk_hole_entry = { #endif /* allocate dst with ip6_dst_ops */ -static __inline__ struct rt6_info *ip6_dst_alloc(void) +static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) { - return (struct rt6_info *)dst_alloc(&ip6_dst_ops); + return (struct rt6_info *)dst_alloc(ops); } static void ip6_dst_destroy(struct dst_entry *dst) @@ -209,7 +208,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; struct net_device *loopback_dev = - dev->nd_net->loopback_dev; + dev_net(dev)->loopback_dev; if (dev != loopback_dev && idev != NULL && idev->dev == dev) { struct inet6_dev *loopback_idev = @@ -230,29 +229,35 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) static inline int rt6_need_strict(struct in6_addr *daddr) { return (ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); } /* * Route lookup. Any table->tb6_lock is implied. */ -static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, +static inline struct rt6_info *rt6_device_match(struct net *net, + struct rt6_info *rt, + struct in6_addr *saddr, int oif, - int strict) + int flags) { struct rt6_info *local = NULL; struct rt6_info *sprt; - if (oif) { - for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { - struct net_device *dev = sprt->rt6i_dev; + if (!oif && ipv6_addr_any(saddr)) + goto out; + + for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { + struct net_device *dev = sprt->rt6i_dev; + + if (oif) { if (dev->ifindex == oif) return sprt; if (dev->flags & IFF_LOOPBACK) { if (sprt->rt6i_idev == NULL || sprt->rt6i_idev->dev->ifindex != oif) { - if (strict && oif) + if (flags & RT6_LOOKUP_F_IFACE && oif) continue; if (local && (!oif || local->rt6i_idev->dev->ifindex == oif)) @@ -260,14 +265,21 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, } local = sprt; } + } else { + if (ipv6_chk_addr(net, saddr, dev, + flags & RT6_LOOKUP_F_IFACE)) + return sprt; } + } + if (oif) { if (local) return local; - if (strict) - return &ip6_null_entry; + if (flags & RT6_LOOKUP_F_IFACE) + return net->ipv6.ip6_null_entry; } +out: return rt; } @@ -407,9 +419,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; + struct net *net; RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", - __FUNCTION__, fn->leaf, oif); + __func__, fn->leaf, oif); rt0 = fn->rr_ptr; if (!rt0) @@ -430,19 +443,21 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) } RT6_TRACE("%s() => %p\n", - __FUNCTION__, match); + __func__, match); - return (match ? match : &ip6_null_entry); + net = dev_net(rt0->rt6i_dev); + return (match ? match : net->ipv6.ip6_null_entry); } #ifdef CONFIG_IPV6_ROUTE_INFO int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr *gwaddr) { + struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; - u32 lifetime; + unsigned long lifetime; struct rt6_info *rt; if (len < sizeof(struct route_info)) { @@ -466,15 +481,9 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, pref = rinfo->route_pref; if (pref == ICMPV6_ROUTER_PREF_INVALID) - pref = ICMPV6_ROUTER_PREF_MEDIUM; + return -EINVAL; - lifetime = ntohl(rinfo->lifetime); - if (lifetime == 0xffffffff) { - /* infinity */ - } else if (lifetime > 0x7fffffff/HZ) { - /* Avoid arithmetic overflow */ - lifetime = 0x7fffffff/HZ - 1; - } + lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); if (rinfo->length == 3) prefix = (struct in6_addr *)rinfo->prefix; @@ -486,7 +495,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); + rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, + dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); @@ -494,14 +504,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, + rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (lifetime == 0xffffffff) { + if (!addrconf_finite_timeout(lifetime)) { rt->rt6i_flags &= ~RTF_EXPIRES; } else { rt->rt6i_expires = jiffies + HZ * lifetime; @@ -513,9 +523,9 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -#define BACKTRACK(saddr) \ +#define BACKTRACK(__net, saddr) \ do { \ - if (rt == &ip6_null_entry) { \ + if (rt == __net->ipv6.ip6_null_entry) { \ struct fib6_node *pn; \ while (1) { \ if (fn->fn_flags & RTN_TL_ROOT) \ @@ -531,7 +541,8 @@ do { \ } \ } while(0) -static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_lookup(struct net *net, + struct fib6_table *table, struct flowi *fl, int flags) { struct fib6_node *fn; @@ -541,8 +552,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(rt, fl->oif, flags); - BACKTRACK(&fl->fl6_src); + rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); + BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->u.dst, jiffies); read_unlock_bh(&table->tb6_lock); @@ -550,8 +561,8 @@ out: } -struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, - int oif, int strict) +struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, + const struct in6_addr *saddr, int oif, int strict) { struct flowi fl = { .oif = oif, @@ -569,7 +580,7 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -602,7 +613,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = &init_net, + .nl_net = dev_net(rt->rt6i_dev), }; return __ip6_ins_rt(rt, &info); } @@ -619,6 +630,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad rt = ip6_rt_copy(ort); if (rt) { + struct neighbour *neigh; + int attempts = !in_softirq(); + if (!(rt->rt6i_flags&RTF_GATEWAY)) { if (rt->rt6i_dst.plen != 128 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) @@ -638,7 +652,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad } #endif - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + retry: + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + if (IS_ERR(neigh)) { + struct net *net = dev_net(rt->rt6i_dev); + int saved_rt_min_interval = + net->ipv6.sysctl.ip6_rt_gc_min_interval; + int saved_rt_elasticity = + net->ipv6.sysctl.ip6_rt_gc_elasticity; + + if (attempts-- > 0) { + net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; + net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; + + ip6_dst_gc(&net->ipv6.ip6_dst_ops); + + net->ipv6.sysctl.ip6_rt_gc_elasticity = + saved_rt_elasticity; + net->ipv6.sysctl.ip6_rt_gc_min_interval = + saved_rt_min_interval; + goto retry; + } + + if (net_ratelimit()) + printk(KERN_WARNING + "Neighbour table overflow.\n"); + dst_free(&rt->u.dst); + return NULL; + } + rt->rt6i_nexthop = neigh; } @@ -658,15 +700,15 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, - struct flowi *fl, int flags) +static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; int strict = 0; int attempts = 3; int err; - int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; + int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -678,8 +720,9 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - BACKTRACK(&fl->fl6_src); - if (rt == &ip6_null_entry || + + BACKTRACK(net, &fl->fl6_src); + if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; @@ -697,7 +740,7 @@ restart: } dst_release(&rt->u.dst); - rt = nrt ? : &ip6_null_entry; + rt = nrt ? : net->ipv6.ip6_null_entry; dst_hold(&rt->u.dst); if (nrt) { @@ -730,15 +773,16 @@ out2: return rt; } -static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(table, fl->iif, fl, flags); + return ip6_pol_route(net, table, fl->iif, fl, flags); } void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, @@ -753,19 +797,20 @@ void ip6_route_input(struct sk_buff *skb) .proto = iph->nexthdr, }; - if (rt6_need_strict(&iph->daddr)) + if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; - skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); + skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); } -static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(table, fl->oif, fl, flags); + return ip6_pol_route(net, table, fl->oif, fl, flags); } -struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, + struct flowi *fl) { int flags = 0; @@ -774,8 +819,17 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; + else if (sk) { + unsigned int prefs = inet6_sk(sk)->srcprefs; + if (prefs & IPV6_PREFER_SRC_TMP) + flags |= RT6_LOOKUP_F_SRCPREF_TMP; + if (prefs & IPV6_PREFER_SRC_PUBLIC) + flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; + if (prefs & IPV6_PREFER_SRC_COA) + flags |= RT6_LOOKUP_F_SRCPREF_COA; + } - return fib6_rule_lookup(fl, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } EXPORT_SYMBOL(ip6_route_output); @@ -857,7 +911,7 @@ static void ip6_link_failure(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); - rt = (struct rt6_info *) skb->dst; + rt = (struct rt6_info *) skb_dst(skb); if (rt) { if (rt->rt6i_flags&RTF_CACHE) { dst_set_expires(&rt->u.dst, 0); @@ -884,12 +938,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static int ipv6_get_mtu(struct net_device *dev); -static inline unsigned int ipv6_advmss(unsigned int mtu) +static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) { mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); - if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss) - mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss; + if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) + mtu = net->ipv6.sysctl.ip6_rt_min_advmss; /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and @@ -902,21 +956,21 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) return mtu; } -static struct dst_entry *ndisc_dst_gc_list; -static DEFINE_SPINLOCK(ndisc_lock); +static struct dst_entry *icmp6_dst_gc_list; +static DEFINE_SPINLOCK(icmp6_dst_lock); -struct dst_entry *ndisc_dst_alloc(struct net_device *dev, +struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct neighbour *neigh, - struct in6_addr *addr, - int (*output)(struct sk_buff *)) + const struct in6_addr *addr) { struct rt6_info *rt; struct inet6_dev *idev = in6_dev_get(dev); + struct net *net = dev_net(dev); if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -925,8 +979,11 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, dev_hold(dev); if (neigh) neigh_hold(neigh); - else + else { neigh = ndisc_get_neigh(dev, addr); + if (IS_ERR(neigh)) + neigh = NULL; + } rt->rt6i_dev = dev; rt->rt6i_idev = idev; @@ -934,8 +991,8 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); - rt->u.dst.output = output; + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); + rt->u.dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST @@ -945,63 +1002,83 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, rt->rt6i_dst.plen = 128; #endif - spin_lock_bh(&ndisc_lock); - rt->u.dst.next = ndisc_dst_gc_list; - ndisc_dst_gc_list = &rt->u.dst; - spin_unlock_bh(&ndisc_lock); + spin_lock_bh(&icmp6_dst_lock); + rt->u.dst.next = icmp6_dst_gc_list; + icmp6_dst_gc_list = &rt->u.dst; + spin_unlock_bh(&icmp6_dst_lock); - fib6_force_start_gc(); + fib6_force_start_gc(net); out: return &rt->u.dst; } -int ndisc_dst_gc(int *more) +int icmp6_dst_gc(void) { struct dst_entry *dst, *next, **pprev; - int freed; + int more = 0; next = NULL; - freed = 0; - spin_lock_bh(&ndisc_lock); - pprev = &ndisc_dst_gc_list; + spin_lock_bh(&icmp6_dst_lock); + pprev = &icmp6_dst_gc_list; while ((dst = *pprev) != NULL) { if (!atomic_read(&dst->__refcnt)) { *pprev = dst->next; dst_free(dst); - freed++; } else { pprev = &dst->next; - (*more)++; + ++more; } } - spin_unlock_bh(&ndisc_lock); + spin_unlock_bh(&icmp6_dst_lock); - return freed; + return more; } -static int ip6_dst_gc(void) +static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), + void *arg) { - static unsigned expire = 30*HZ; - static unsigned long last_gc; - unsigned long now = jiffies; + struct dst_entry *dst, **pprev; - if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) && - atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size) - goto out; + spin_lock_bh(&icmp6_dst_lock); + pprev = &icmp6_dst_gc_list; + while ((dst = *pprev) != NULL) { + struct rt6_info *rt = (struct rt6_info *) dst; + if (func(rt, arg)) { + *pprev = dst->next; + dst_free(dst); + } else { + pprev = &dst->next; + } + } + spin_unlock_bh(&icmp6_dst_lock); +} - expire++; - fib6_run_gc(expire); - last_gc = now; - if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) - expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1; +static int ip6_dst_gc(struct dst_ops *ops) +{ + unsigned long now = jiffies; + struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); + int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; + int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; + int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; + int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; + unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; + + if (time_after(rt_last_gc + rt_min_interval, now) && + atomic_read(&ops->entries) <= rt_max_size) + goto out; + net->ipv6.ip6_rt_gc_expire++; + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); + net->ipv6.ip6_rt_last_gc = now; + if (atomic_read(&ops->entries) < ops->gc_thresh) + net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; out: - expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity; - return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size); + net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; + return (atomic_read(&ops->entries) > rt_max_size); } /* Clean host part of a prefix. Not necessary in radix tree, @@ -1023,15 +1100,17 @@ static int ipv6_get_mtu(struct net_device *dev) return mtu; } -int ipv6_get_hoplimit(struct net_device *dev) +int ip6_dst_hoplimit(struct dst_entry *dst) { - int hoplimit = ipv6_devconf.hop_limit; - struct inet6_dev *idev; - - idev = in6_dev_get(dev); - if (idev) { - hoplimit = idev->cnf.hop_limit; - in6_dev_put(idev); + int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hoplimit < 0) { + struct net_device *dev = dst->dev; + struct inet6_dev *idev = in6_dev_get(dev); + if (idev) { + hoplimit = idev->cnf.hop_limit; + in6_dev_put(idev); + } else + hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; } return hoplimit; } @@ -1043,6 +1122,7 @@ int ipv6_get_hoplimit(struct net_device *dev) int ip6_route_add(struct fib6_config *cfg) { int err; + struct net *net = cfg->fc_nlinfo.nl_net; struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; @@ -1057,7 +1137,7 @@ int ip6_route_add(struct fib6_config *cfg) #endif if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(&init_net, cfg->fc_ifindex); + dev = dev_get_by_index(net, cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1068,13 +1148,13 @@ int ip6_route_add(struct fib6_config *cfg) if (cfg->fc_metric == 0) cfg->fc_metric = IP6_RT_PRIO_USER; - table = fib6_new_table(cfg->fc_table); + table = fib6_new_table(net, cfg->fc_table); if (table == NULL) { err = -ENOBUFS; goto out; } - rt = ip6_dst_alloc(); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); if (rt == NULL) { err = -ENOMEM; @@ -1082,7 +1162,9 @@ int ip6_route_add(struct fib6_config *cfg) } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); + rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? + jiffies + clock_t_to_jiffies(cfg->fc_expires) : + 0; if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; @@ -1115,12 +1197,12 @@ int ip6_route_add(struct fib6_config *cfg) if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != init_net.loopback_dev) { + if (dev != net->loopback_dev) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = init_net.loopback_dev; + dev = net->loopback_dev; dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1157,7 +1239,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!(gwa_type&IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); err = -EHOSTUNREACH; if (grt == NULL) @@ -1219,15 +1301,18 @@ install_route: } } - if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!rt->u.dst.metrics[RTAX_MTU-1]) + if (!dst_mtu(&rt->u.dst)) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); - if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); + if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; + + cfg->fc_nlinfo.nl_net = dev_net(dev); + return __ip6_ins_rt(rt, &cfg->fc_nlinfo); out: @@ -1244,8 +1329,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; + struct net *net = dev_net(rt->rt6i_dev); - if (rt == &ip6_null_entry) + if (rt == net->ipv6.ip6_null_entry) return -ENOENT; table = rt->rt6i_table; @@ -1262,7 +1348,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = &init_net, + .nl_net = dev_net(rt->rt6i_dev), }; return __ip6_del_rt(rt, &info); } @@ -1274,7 +1360,7 @@ static int ip6_route_del(struct fib6_config *cfg) struct rt6_info *rt; int err = -ESRCH; - table = fib6_get_table(cfg->fc_table); + table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); if (table == NULL) return err; @@ -1314,7 +1400,8 @@ struct ip6rd_flowi { struct in6_addr gateway; }; -static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, +static struct rt6_info *__ip6_route_redirect(struct net *net, + struct fib6_table *table, struct flowi *fl, int flags) { @@ -1357,8 +1444,8 @@ restart: } if (!rt) - rt = &ip6_null_entry; - BACKTRACK(&fl->fl6_src); + rt = net->ipv6.ip6_null_entry; + BACKTRACK(net, &fl->fl6_src); out: dst_hold(&rt->u.dst); @@ -1373,6 +1460,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct net_device *dev) { int flags = RT6_LOOKUP_F_HAS_SADDR; + struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, @@ -1383,13 +1471,15 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, }, }, }, - .gateway = *gateway, }; + ipv6_addr_copy(&rdfl.gateway, gateway); + if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; - return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); + return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, + flags, __ip6_route_redirect); } void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, @@ -1398,10 +1488,11 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, { struct rt6_info *rt, *nrt = NULL; struct netevent_redirect netevent; + struct net *net = dev_net(neigh->dev); rt = ip6_route_redirect(dest, src, saddr, neigh->dev); - if (rt == &ip6_null_entry) { + if (rt == net->ipv6.ip6_null_entry) { if (net_ratelimit()) printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " "for redirect target\n"); @@ -1446,7 +1537,8 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); + nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), + dst_mtu(&nrt->u.dst)); if (ip6_ins_rt(nrt)) goto out; @@ -1474,9 +1566,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, struct net_device *dev, u32 pmtu) { struct rt6_info *rt, *nrt; + struct net *net = dev_net(dev); int allfrag = 0; - rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); + rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); if (rt == NULL) return; @@ -1509,7 +1602,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, rt->u.dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; - dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); + dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; } @@ -1535,7 +1628,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); + dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; ip6_ins_rt(nrt); @@ -1550,7 +1643,8 @@ out: static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { - struct rt6_info *rt = ip6_dst_alloc(); + struct net *net = dev_net(ort->rt6i_dev); + struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); if (rt) { rt->u.dst.input = ort->u.dst.input; @@ -1581,14 +1675,15 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) } #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_get_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex) { struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(RT6_TABLE_INFO); + table = fib6_get_table(net, RT6_TABLE_INFO); if (table == NULL) return NULL; @@ -1612,17 +1707,21 @@ out: return rt; } -static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_add_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex, unsigned pref) { struct fib6_config cfg = { .fc_table = RT6_TABLE_INFO, - .fc_metric = 1024, + .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), + .fc_nlinfo.pid = 0, + .fc_nlinfo.nlh = NULL, + .fc_nlinfo.nl_net = net, }; ipv6_addr_copy(&cfg.fc_dst, prefix); @@ -1634,7 +1733,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle ip6_route_add(&cfg); - return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); } #endif @@ -1643,7 +1742,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); if (table == NULL) return NULL; @@ -1660,18 +1759,19 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d return rt; } -EXPORT_SYMBOL(rt6_get_dflt_router); - struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) { struct fib6_config cfg = { .fc_table = RT6_TABLE_DFLT, - .fc_metric = 1024, + .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), + .fc_nlinfo.pid = 0, + .fc_nlinfo.nlh = NULL, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_gateway, gwaddr); @@ -1681,13 +1781,13 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(void) +void rt6_purge_dflt_routers(struct net *net) { struct rt6_info *rt; struct fib6_table *table; /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(RT6_TABLE_DFLT); + table = fib6_get_table(net, RT6_TABLE_DFLT); if (table == NULL) return; @@ -1704,7 +1804,8 @@ restart: read_unlock_bh(&table->tb6_lock); } -static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, +static void rtmsg_to_fib6_config(struct net *net, + struct in6_rtmsg *rtmsg, struct fib6_config *cfg) { memset(cfg, 0, sizeof(*cfg)); @@ -1717,12 +1818,14 @@ static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, cfg->fc_src_len = rtmsg->rtmsg_src_len; cfg->fc_flags = rtmsg->rtmsg_flags; + cfg->fc_nlinfo.nl_net = net; + ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); } -int ipv6_route_ioctl(unsigned int cmd, void __user *arg) +int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct fib6_config cfg; struct in6_rtmsg rtmsg; @@ -1738,7 +1841,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) if (err) return -EFAULT; - rtmsg_to_fib6_config(&rtmsg, &cfg); + rtmsg_to_fib6_config(net, &rtmsg, &cfg); rtnl_lock(); switch (cmd) { @@ -1763,19 +1866,22 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) +static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { int type; + struct dst_entry *dst = skb_dst(skb); switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IPSTATS_MIB_INADDRERRORS); break; } /* FALLTHROUGH */ case IPSTATS_MIB_OUTNOROUTES: - IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + ipstats_mib_noroutes); break; } icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); @@ -1790,7 +1896,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) static int ip6_pkt_discard_out(struct sk_buff *skb) { - skb->dev = skb->dst->dev; + skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } @@ -1803,7 +1909,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) static int ip6_pkt_prohibit_out(struct sk_buff *skb) { - skb->dev = skb->dst->dev; + skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } @@ -1817,21 +1923,23 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, int anycast) { - struct rt6_info *rt = ip6_dst_alloc(); + struct net *net = dev_net(idev->dev); + struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); + struct neighbour *neigh; if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(init_net.loopback_dev); + dev_hold(net->loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = init_net.loopback_dev; + rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; rt->u.dst.obsolete = -1; @@ -1840,34 +1948,55 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_ANYCAST; else rt->rt6i_flags |= RTF_LOCAL; - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); - if (rt->rt6i_nexthop == NULL) { + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + if (IS_ERR(neigh)) { dst_free(&rt->u.dst); - return ERR_PTR(-ENOMEM); + + /* We are casting this because that is the return + * value type. But an errno encoded pointer is the + * same regardless of the underlying pointer type, + * and that's what we are returning. So this is OK. + */ + return (struct rt6_info *) neigh; } + rt->rt6i_nexthop = neigh; ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); + rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); atomic_set(&rt->u.dst.__refcnt, 1); return rt; } +struct arg_dev_net { + struct net_device *dev; + struct net *net; +}; + static int fib6_ifdown(struct rt6_info *rt, void *arg) { - if (((void*)rt->rt6i_dev == arg || arg == NULL) && - rt != &ip6_null_entry) { + struct net_device *dev = ((struct arg_dev_net *)arg)->dev; + struct net *net = ((struct arg_dev_net *)arg)->net; + + if (((void *)rt->rt6i_dev == dev || dev == NULL) && + rt != net->ipv6.ip6_null_entry) { RT6_TRACE("deleted by ifdown %p\n", rt); return -1; } return 0; } -void rt6_ifdown(struct net_device *dev) +void rt6_ifdown(struct net *net, struct net_device *dev) { - fib6_clean_all(fib6_ifdown, 0, dev); + struct arg_dev_net adn = { + .dev = dev, + .net = net, + }; + + fib6_clean_all(net, fib6_ifdown, 0, &adn); + icmp6_clean_all(fib6_ifdown, &adn); } struct rt6_mtu_change_arg @@ -1880,6 +2009,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; + struct net *net = dev_net(arg->dev); /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. @@ -1907,11 +2037,11 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) */ if (rt->rt6i_dev == arg->dev && !dst_metric_locked(&rt->u.dst, RTAX_MTU) && - (dst_mtu(&rt->u.dst) > arg->mtu || + (dst_mtu(&rt->u.dst) >= arg->mtu || (dst_mtu(&rt->u.dst) < arg->mtu && dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); } return 0; } @@ -1923,7 +2053,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) .mtu = mtu, }; - fib6_clean_all(rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { @@ -1960,6 +2090,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; + cfg->fc_nlinfo.nl_net = sock_net(skb->sk); if (tb[RTA_GATEWAY]) { nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); @@ -2005,13 +2136,9 @@ errout: static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; - if (net != &init_net) - return -EINVAL; - err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2021,13 +2148,9 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; - if (net != &init_net) - return -EINVAL; - err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2050,10 +2173,11 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(sizeof(struct rta_cacheinfo)); } -static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, +static int rt6_fill_node(struct net *net, + struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, - int prefix, unsigned int flags) + int prefix, int nowait, unsigned int flags) { struct rtmsg *rtm; struct nlmsghdr *nlh; @@ -2113,11 +2237,28 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, } else if (rtm->rtm_src_len) NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); #endif - if (iif) - NLA_PUT_U32(skb, RTA_IIF, iif); - else if (dst) { + if (iif) { +#ifdef CONFIG_IPV6_MROUTE + if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { + int err = ip6mr_get_route(net, skb, rtm, nowait); + if (err <= 0) { + if (!nowait) { + if (err == 0) + return 0; + goto nla_put_failure; + } else { + if (err == -EMSGSIZE) + goto nla_put_failure; + } + } + } else +#endif + NLA_PUT_U32(skb, RTA_IIF, iif); + } else if (dst) { + struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); struct in6_addr saddr_buf; - if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) + if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, + dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } @@ -2132,7 +2273,13 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; + if (!(rt->rt6i_flags & RTF_EXPIRES)) + expires = 0; + else if (rt->rt6i_expires - jiffies < INT_MAX) + expires = rt->rt6i_expires - jiffies; + else + expires = INT_MAX; + if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, rt->u.dst.error) < 0) goto nla_put_failure; @@ -2155,14 +2302,15 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) } else prefix = 0; - return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, + return rt6_fill_node(arg->net, + arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, - prefix, NLM_F_MULTI); + prefix, 0, NLM_F_MULTI); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; @@ -2170,9 +2318,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void struct flowi fl; int err, iif = 0; - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); if (err < 0) goto errout; @@ -2202,7 +2347,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(&init_net, iif); + dev = __dev_get_by_index(net, iif); if (!dev) { err = -ENODEV; goto errout; @@ -2221,18 +2366,18 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(NULL, &fl); - skb->dst = &rt->u.dst; + rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); + skb_dst_set(skb, &rt->u.dst); - err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, + err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, 0, 0); + nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { kfree_skb(skb); goto errout; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout: return err; } @@ -2240,6 +2385,7 @@ errout: void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; + struct net *net = info->nl_net; u32 seq; int err; @@ -2250,19 +2396,40 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) if (skb == NULL) goto errout; - err = rt6_fill_node(skb, rt, NULL, NULL, 0, - event, info->pid, seq, 0, 0); + err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, + event, info->pid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, info->pid, - RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); + rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + info->nlh, gfp_any()); + return; errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); +} + +static int ip6_route_dev_notify(struct notifier_block *this, + unsigned long event, void *data) +{ + struct net_device *dev = (struct net_device *)data; + struct net *net = dev_net(dev); + + if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { + net->ipv6.ip6_null_entry->u.dst.dev = dev; + net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; + net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); + net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; + net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); +#endif + } + + return NOTIFY_OK; } /* @@ -2286,19 +2453,16 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; - seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr), - rt->rt6i_dst.plen); + seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr), - rt->rt6i_src.plen); + seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif if (rt->rt6i_nexthop) { - seq_printf(m, NIP6_SEQFMT, - NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); + seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } @@ -2311,13 +2475,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) static int ipv6_route_show(struct seq_file *m, void *v) { - fib6_clean_all(rt6_info_route, 0, m); + struct net *net = (struct net *)m->private; + fib6_clean_all(net, rt6_info_route, 0, m); return 0; } static int ipv6_route_open(struct inode *inode, struct file *file) { - return single_open(file, ipv6_route_show, NULL); + return single_open_net(inode, file, ipv6_route_show); } static const struct file_operations ipv6_route_proc_fops = { @@ -2325,24 +2490,27 @@ static const struct file_operations ipv6_route_proc_fops = { .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) { + struct net *net = (struct net *)seq->private; seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", - rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, - rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, - rt6_stats.fib_rt_cache, - atomic_read(&ip6_dst_ops.entries), - rt6_stats.fib_discarded_routes); + net->ipv6.rt6_stats->fib_nodes, + net->ipv6.rt6_stats->fib_route_nodes, + net->ipv6.rt6_stats->fib_rt_alloc, + net->ipv6.rt6_stats->fib_rt_entries, + net->ipv6.rt6_stats->fib_rt_cache, + atomic_read(&net->ipv6.ip6_dst_ops.entries), + net->ipv6.rt6_stats->fib_discarded_routes); return 0; } static int rt6_stats_seq_open(struct inode *inode, struct file *file) { - return single_open(file, rt6_stats_seq_show, NULL); + return single_open_net(inode, file, rt6_stats_seq_show); } static const struct file_operations rt6_stats_seq_fops = { @@ -2350,54 +2518,21 @@ static const struct file_operations rt6_stats_seq_fops = { .open = rt6_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; - -static int ipv6_route_proc_init(struct net *net) -{ - int ret = -ENOMEM; - if (!proc_net_fops_create(net, "ipv6_route", - 0, &ipv6_route_proc_fops)) - goto out; - - if (!proc_net_fops_create(net, "rt6_stats", - S_IRUGO, &rt6_stats_seq_fops)) - goto out_ipv6_route; - - ret = 0; -out: - return ret; -out_ipv6_route: - proc_net_remove(net, "ipv6_route"); - goto out; -} - -static void ipv6_route_proc_fini(struct net *net) -{ - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); -} -#else -static inline int ipv6_route_proc_init(struct net *net) -{ - return 0; -} -static inline void ipv6_route_proc_fini(struct net *net) -{ - return ; -} #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SYSCTL static -int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, +int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int delay = init_net.ipv6.sysctl.flush_delay; + struct net *net = current->nsproxy->net_ns; + int delay = net->ipv6.sysctl.flush_delay; if (write) { - proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay); + proc_dointvec(ctl, write, buffer, lenp, ppos); + fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); return 0; } else return -EINVAL; @@ -2409,88 +2544,72 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), .mode = 0200, - .proc_handler = &ipv6_sysctl_rtcache_flush + .proc_handler = ipv6_sysctl_rtcache_flush }, { - .ctl_name = NET_IPV6_ROUTE_GC_THRESH, .procname = "gc_thresh", - .data = &ip6_dst_ops.gc_thresh, + .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, .procname = "max_size", .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, .procname = "gc_timeout", .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, .procname = "gc_interval", .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, .procname = "gc_elasticity", .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, .procname = "mtu_expires", .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, .procname = "min_adv_mss", .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, .procname = "gc_min_interval_ms", .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, }, - { .ctl_name = 0 } + { } }; struct ctl_table *ipv6_route_sysctl_init(struct net *net) @@ -2500,33 +2619,147 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net) table = kmemdup(ipv6_route_table_template, sizeof(ipv6_route_table_template), GFP_KERNEL); + + if (table) { + table[0].data = &net->ipv6.sysctl.flush_delay; + table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; + table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; + table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; + table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; + table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; + table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; + table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; + table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + } + return table; } #endif +static int ip6_route_net_init(struct net *net) +{ + int ret = -ENOMEM; + + memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, + sizeof(net->ipv6.ip6_dst_ops)); + + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, + sizeof(*net->ipv6.ip6_null_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_null_entry) + goto out_ip6_dst_ops; + net->ipv6.ip6_null_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_null_entry; + net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, + sizeof(*net->ipv6.ip6_prohibit_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_prohibit_entry) + goto out_ip6_null_entry; + net->ipv6.ip6_prohibit_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_prohibit_entry; + net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + + net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, + sizeof(*net->ipv6.ip6_blk_hole_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_blk_hole_entry) + goto out_ip6_prohibit_entry; + net->ipv6.ip6_blk_hole_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; + net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; +#endif + + net->ipv6.sysctl.flush_delay = 0; + net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; + net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; + net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; + net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; + net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; + net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; + +#ifdef CONFIG_PROC_FS + proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); + proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); +#endif + net->ipv6.ip6_rt_gc_expire = 30*HZ; + + ret = 0; +out: + return ret; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +out_ip6_prohibit_entry: + kfree(net->ipv6.ip6_prohibit_entry); +out_ip6_null_entry: + kfree(net->ipv6.ip6_null_entry); +#endif +out_ip6_dst_ops: + goto out; +} + +static void ip6_route_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ipv6_route"); + proc_net_remove(net, "rt6_stats"); +#endif + kfree(net->ipv6.ip6_null_entry); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + kfree(net->ipv6.ip6_prohibit_entry); + kfree(net->ipv6.ip6_blk_hole_entry); +#endif +} + +static struct pernet_operations ip6_route_net_ops = { + .init = ip6_route_net_init, + .exit = ip6_route_net_exit, +}; + +static struct notifier_block ip6_route_dev_notifier = { + .notifier_call = ip6_route_dev_notify, + .priority = 0, +}; + int __init ip6_route_init(void) { int ret; - ip6_dst_ops.kmem_cachep = + ret = -ENOMEM; + ip6_dst_ops_template.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ip6_dst_ops.kmem_cachep) - return -ENOMEM; - - ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; + if (!ip6_dst_ops_template.kmem_cachep) + goto out; - ret = fib6_init(); + ret = register_pernet_subsys(&ip6_route_net_ops); if (ret) goto out_kmem_cache; - ret = ipv6_route_proc_init(&init_net); + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; + + /* Registering of the loopback is done before this portion of code, + * the loopback reference in rt6_info will not be taken, do it + * manually for init_net */ + init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #endif + ret = fib6_init(); if (ret) - goto out_fib6_init; + goto out_register_subsys; ret = xfrm6_init(); if (ret) - goto out_proc_init; + goto out_fib6_init; ret = fib6_rules_init(); if (ret) @@ -2538,7 +2771,10 @@ int __init ip6_route_init(void) __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) goto fib6_rules_init; - ret = 0; + ret = register_netdevice_notifier(&ip6_route_dev_notifier); + if (ret) + goto fib6_rules_init; + out: return ret; @@ -2546,22 +2782,21 @@ fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: xfrm6_fini(); -out_proc_init: - ipv6_route_proc_fini(&init_net); out_fib6_init: - rt6_ifdown(NULL); fib6_gc_cleanup(); +out_register_subsys: + unregister_pernet_subsys(&ip6_route_net_ops); out_kmem_cache: - kmem_cache_destroy(ip6_dst_ops.kmem_cachep); + kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); goto out; } void ip6_route_cleanup(void) { + unregister_netdevice_notifier(&ip6_route_dev_notifier); fib6_rules_cleanup(); - ipv6_route_proc_fini(&init_net); xfrm6_fini(); - rt6_ifdown(NULL); fib6_gc_cleanup(); - kmem_cache_destroy(ip6_dst_ops.kmem_cachep); + unregister_pernet_subsys(&ip6_route_net_ops); + kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); }