X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv6%2Froute.c;h=8500156f263720e5042286da5a9109496ed3902d;hb=d5aa407f59f5b83d2c50ec88f5bf56d40f1f8978;hp=15e9a86f28c84e96a1b3597f2397a75e2f88e508;hpb=0dc47877a3de00ceadea0005189656ae8dc52669;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 15e9a86..8500156 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque * - * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -36,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -99,7 +98,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, - .protocol = __constant_htons(ETH_P_IPV6), + .protocol = cpu_to_be16(ETH_P_IPV6), .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, @@ -108,8 +107,7 @@ static struct dst_ops ip6_dst_ops_template = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, - .local_out = ip6_local_out, - .entry_size = sizeof(struct rt6_info), + .local_out = __ip6_local_out, .entries = ATOMIC_INIT(0), }; @@ -119,11 +117,10 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, - .protocol = __constant_htons(ETH_P_IPV6), + .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -140,6 +137,7 @@ static struct rt6_info ip6_null_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -149,7 +147,7 @@ static struct rt6_info ip6_null_entry_template = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -struct rt6_info ip6_prohibit_entry_template = { +static struct rt6_info ip6_prohibit_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -162,6 +160,7 @@ struct rt6_info ip6_prohibit_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -179,6 +178,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -208,7 +208,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; struct net_device *loopback_dev = - dev->nd_net->loopback_dev; + dev_net(dev)->loopback_dev; if (dev != loopback_dev && idev != NULL && idev->dev == dev) { struct inet6_dev *loopback_idev = @@ -229,7 +229,7 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) static inline int rt6_need_strict(struct in6_addr *daddr) { return (ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); } /* @@ -238,21 +238,26 @@ static inline int rt6_need_strict(struct in6_addr *daddr) static inline struct rt6_info *rt6_device_match(struct net *net, struct rt6_info *rt, + struct in6_addr *saddr, int oif, - int strict) + int flags) { struct rt6_info *local = NULL; struct rt6_info *sprt; - if (oif) { - for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { - struct net_device *dev = sprt->rt6i_dev; + if (!oif && ipv6_addr_any(saddr)) + goto out; + + for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { + struct net_device *dev = sprt->rt6i_dev; + + if (oif) { if (dev->ifindex == oif) return sprt; if (dev->flags & IFF_LOOPBACK) { if (sprt->rt6i_idev == NULL || sprt->rt6i_idev->dev->ifindex != oif) { - if (strict && oif) + if (flags & RT6_LOOKUP_F_IFACE && oif) continue; if (local && (!oif || local->rt6i_idev->dev->ifindex == oif)) @@ -260,14 +265,21 @@ static inline struct rt6_info *rt6_device_match(struct net *net, } local = sprt; } + } else { + if (ipv6_chk_addr(net, saddr, dev, + flags & RT6_LOOKUP_F_IFACE)) + return sprt; } + } + if (oif) { if (local) return local; - if (strict) + if (flags & RT6_LOOKUP_F_IFACE) return net->ipv6.ip6_null_entry; } +out: return rt; } @@ -433,7 +445,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) RT6_TRACE("%s() => %p\n", __func__, match); - net = rt0->rt6i_dev->nd_net; + net = dev_net(rt0->rt6i_dev); return (match ? match : net->ipv6.ip6_null_entry); } @@ -441,11 +453,11 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr *gwaddr) { - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; - u32 lifetime; + unsigned long lifetime; struct rt6_info *rt; if (len < sizeof(struct route_info)) { @@ -469,15 +481,9 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, pref = rinfo->route_pref; if (pref == ICMPV6_ROUTER_PREF_INVALID) - pref = ICMPV6_ROUTER_PREF_MEDIUM; + return -EINVAL; - lifetime = ntohl(rinfo->lifetime); - if (lifetime == 0xffffffff) { - /* infinity */ - } else if (lifetime > 0x7fffffff/HZ) { - /* Avoid arithmetic overflow */ - lifetime = 0x7fffffff/HZ - 1; - } + lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); if (rinfo->length == 3) prefix = (struct in6_addr *)rinfo->prefix; @@ -505,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (lifetime == 0xffffffff) { + if (!addrconf_finite_timeout(lifetime)) { rt->rt6i_flags &= ~RTF_EXPIRES; } else { rt->rt6i_expires = jiffies + HZ * lifetime; @@ -546,7 +552,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(net, rt, fl->oif, flags); + rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->u.dst, jiffies); @@ -555,8 +561,8 @@ out: } -struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr, - struct in6_addr *saddr, int oif, int strict) +struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, + const struct in6_addr *saddr, int oif, int strict) { struct flowi fl = { .oif = oif, @@ -607,7 +613,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = rt->rt6i_dev->nd_net, + .nl_net = dev_net(rt->rt6i_dev), }; return __ip6_ins_rt(rt, &info); } @@ -624,6 +630,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad rt = ip6_rt_copy(ort); if (rt) { + struct neighbour *neigh; + int attempts = !in_softirq(); + if (!(rt->rt6i_flags&RTF_GATEWAY)) { if (rt->rt6i_dst.plen != 128 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) @@ -643,7 +652,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad } #endif - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + retry: + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + if (IS_ERR(neigh)) { + struct net *net = dev_net(rt->rt6i_dev); + int saved_rt_min_interval = + net->ipv6.sysctl.ip6_rt_gc_min_interval; + int saved_rt_elasticity = + net->ipv6.sysctl.ip6_rt_gc_elasticity; + + if (attempts-- > 0) { + net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; + net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; + + ip6_dst_gc(&net->ipv6.ip6_dst_ops); + + net->ipv6.sysctl.ip6_rt_gc_elasticity = + saved_rt_elasticity; + net->ipv6.sysctl.ip6_rt_gc_min_interval = + saved_rt_min_interval; + goto retry; + } + + if (net_ratelimit()) + printk(KERN_WARNING + "Neighbour table overflow.\n"); + dst_free(&rt->u.dst); + return NULL; + } + rt->rt6i_nexthop = neigh; } @@ -671,7 +708,7 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int strict = 0; int attempts = 3; int err; - int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; + int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -745,7 +782,7 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table * void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); - struct net *net = skb->dev->nd_net; + struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, @@ -760,10 +797,10 @@ void ip6_route_input(struct sk_buff *skb) .proto = iph->nexthdr, }; - if (rt6_need_strict(&iph->daddr)) + if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; - skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); + skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); } static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, @@ -782,6 +819,15 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; + else if (sk) { + unsigned int prefs = inet6_sk(sk)->srcprefs; + if (prefs & IPV6_PREFER_SRC_TMP) + flags |= RT6_LOOKUP_F_SRCPREF_TMP; + if (prefs & IPV6_PREFER_SRC_PUBLIC) + flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; + if (prefs & IPV6_PREFER_SRC_COA) + flags |= RT6_LOOKUP_F_SRCPREF_COA; + } return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } @@ -865,7 +911,7 @@ static void ip6_link_failure(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); - rt = (struct rt6_info *) skb->dst; + rt = (struct rt6_info *) skb_dst(skb); if (rt) { if (rt->rt6i_flags&RTF_CACHE) { dst_set_expires(&rt->u.dst, 0); @@ -915,16 +961,16 @@ static DEFINE_SPINLOCK(icmp6_dst_lock); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct neighbour *neigh, - struct in6_addr *addr) + const struct in6_addr *addr) { struct rt6_info *rt; struct inet6_dev *idev = in6_dev_get(dev); - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -933,8 +979,11 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, dev_hold(dev); if (neigh) neigh_hold(neigh); - else + else { neigh = ndisc_get_neigh(dev, addr); + if (IS_ERR(neigh)) + neigh = NULL; + } rt->rt6i_dev = dev; rt->rt6i_idev = idev; @@ -964,13 +1013,12 @@ out: return &rt->u.dst; } -int icmp6_dst_gc(int *more) +int icmp6_dst_gc(void) { struct dst_entry *dst, *next, **pprev; - int freed; + int more = 0; next = NULL; - freed = 0; spin_lock_bh(&icmp6_dst_lock); pprev = &icmp6_dst_gc_list; @@ -979,22 +1027,40 @@ int icmp6_dst_gc(int *more) if (!atomic_read(&dst->__refcnt)) { *pprev = dst->next; dst_free(dst); - freed++; } else { pprev = &dst->next; - (*more)++; + ++more; } } spin_unlock_bh(&icmp6_dst_lock); - return freed; + return more; +} + +static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), + void *arg) +{ + struct dst_entry *dst, **pprev; + + spin_lock_bh(&icmp6_dst_lock); + pprev = &icmp6_dst_gc_list; + while ((dst = *pprev) != NULL) { + struct rt6_info *rt = (struct rt6_info *) dst; + if (func(rt, arg)) { + *pprev = dst->next; + dst_free(dst); + } else { + pprev = &dst->next; + } + } + spin_unlock_bh(&icmp6_dst_lock); } static int ip6_dst_gc(struct dst_ops *ops) { unsigned long now = jiffies; - struct net *net = ops->dst_net; + struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; @@ -1034,15 +1100,17 @@ static int ipv6_get_mtu(struct net_device *dev) return mtu; } -int ipv6_get_hoplimit(struct net_device *dev) +int ip6_dst_hoplimit(struct dst_entry *dst) { - int hoplimit = ipv6_devconf.hop_limit; - struct inet6_dev *idev; - - idev = in6_dev_get(dev); - if (idev) { - hoplimit = idev->cnf.hop_limit; - in6_dev_put(idev); + int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hoplimit < 0) { + struct net_device *dev = dst->dev; + struct inet6_dev *idev = in6_dev_get(dev); + if (idev) { + hoplimit = idev->cnf.hop_limit; + in6_dev_put(idev); + } else + hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; } return hoplimit; } @@ -1086,7 +1154,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); if (rt == NULL) { err = -ENOMEM; @@ -1094,7 +1162,9 @@ int ip6_route_add(struct fib6_config *cfg) } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); + rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? + jiffies + clock_t_to_jiffies(cfg->fc_expires) : + 0; if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; @@ -1231,17 +1301,17 @@ install_route: } } - if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!rt->u.dst.metrics[RTAX_MTU-1]) + if (!dst_mtu(&rt->u.dst)) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); - if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) + if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; - cfg->fc_nlinfo.nl_net = dev->nd_net; + cfg->fc_nlinfo.nl_net = dev_net(dev); return __ip6_ins_rt(rt, &cfg->fc_nlinfo); @@ -1259,7 +1329,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; - struct net *net = rt->rt6i_dev->nd_net; + struct net *net = dev_net(rt->rt6i_dev); if (rt == net->ipv6.ip6_null_entry) return -ENOENT; @@ -1278,7 +1348,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = rt->rt6i_dev->nd_net, + .nl_net = dev_net(rt->rt6i_dev), }; return __ip6_del_rt(rt, &info); } @@ -1390,7 +1460,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct net_device *dev) { int flags = RT6_LOOKUP_F_HAS_SADDR; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, @@ -1401,9 +1471,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, }, }, }, - .gateway = *gateway, }; + ipv6_addr_copy(&rdfl.gateway, gateway); + if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; @@ -1417,7 +1488,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, { struct rt6_info *rt, *nrt = NULL; struct netevent_redirect netevent; - struct net *net = neigh->dev->nd_net; + struct net *net = dev_net(neigh->dev); rt = ip6_route_redirect(dest, src, saddr, neigh->dev); @@ -1466,7 +1537,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net, + nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), dst_mtu(&nrt->u.dst)); if (ip6_ins_rt(nrt)) @@ -1495,7 +1566,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, struct net_device *dev, u32 pmtu) { struct rt6_info *rt, *nrt; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); int allfrag = 0; rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); @@ -1572,8 +1643,8 @@ out: static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { - struct net *net = ort->rt6i_dev->nd_net; - struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); + struct net *net = dev_net(ort->rt6i_dev); + struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); if (rt) { rt->u.dst.input = ort->u.dst.input; @@ -1671,7 +1742,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); if (table == NULL) return NULL; @@ -1688,8 +1759,6 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d return rt; } -EXPORT_SYMBOL(rt6_get_dflt_router); - struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) @@ -1702,7 +1771,7 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, RTF_UP | RTF_EXPIRES | RTF_PREF(pref), .fc_nlinfo.pid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = dev->nd_net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_gateway, gwaddr); @@ -1797,19 +1866,22 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) +static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { int type; + struct dst_entry *dst = skb_dst(skb); switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IPSTATS_MIB_INADDRERRORS); break; } /* FALLTHROUGH */ case IPSTATS_MIB_OUTNOROUTES: - IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + ipstats_mib_noroutes); break; } icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); @@ -1824,7 +1896,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) static int ip6_pkt_discard_out(struct sk_buff *skb) { - skb->dev = skb->dst->dev; + skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } @@ -1837,7 +1909,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) static int ip6_pkt_prohibit_out(struct sk_buff *skb) { - skb->dev = skb->dst->dev; + skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } @@ -1851,8 +1923,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, int anycast) { - struct net *net = idev->dev->nd_net; - struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); + struct net *net = dev_net(idev->dev); + struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); + struct neighbour *neigh; if (rt == NULL) return ERR_PTR(-ENOMEM); @@ -1875,11 +1948,18 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_ANYCAST; else rt->rt6i_flags |= RTF_LOCAL; - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); - if (rt->rt6i_nexthop == NULL) { + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + if (IS_ERR(neigh)) { dst_free(&rt->u.dst); - return ERR_PTR(-ENOMEM); + + /* We are casting this because that is the return + * value type. But an errno encoded pointer is the + * same regardless of the underlying pointer type, + * and that's what we are returning. So this is OK. + */ + return (struct rt6_info *) neigh; } + rt->rt6i_nexthop = neigh; ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; @@ -1916,6 +1996,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) }; fib6_clean_all(net, fib6_ifdown, 0, &adn); + icmp6_clean_all(fib6_ifdown, &adn); } struct rt6_mtu_change_arg @@ -1928,7 +2009,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; - struct net *net = arg->dev->nd_net; + struct net *net = dev_net(arg->dev); /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. @@ -1972,7 +2053,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) .mtu = mtu, }; - fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { @@ -2009,7 +2090,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; - cfg->fc_nlinfo.nl_net = skb->sk->sk_net; + cfg->fc_nlinfo.nl_net = sock_net(skb->sk); if (tb[RTA_GATEWAY]) { nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); @@ -2092,10 +2173,11 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(sizeof(struct rta_cacheinfo)); } -static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, +static int rt6_fill_node(struct net *net, + struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, - int prefix, unsigned int flags) + int prefix, int nowait, unsigned int flags) { struct rtmsg *rtm; struct nlmsghdr *nlh; @@ -2155,12 +2237,28 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, } else if (rtm->rtm_src_len) NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); #endif - if (iif) - NLA_PUT_U32(skb, RTA_IIF, iif); - else if (dst) { + if (iif) { +#ifdef CONFIG_IPV6_MROUTE + if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { + int err = ip6mr_get_route(net, skb, rtm, nowait); + if (err <= 0) { + if (!nowait) { + if (err == 0) + return 0; + goto nla_put_failure; + } else { + if (err == -EMSGSIZE) + goto nla_put_failure; + } + } + } else +#endif + NLA_PUT_U32(skb, RTA_IIF, iif); + } else if (dst) { + struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); struct in6_addr saddr_buf; - if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, - dst, &saddr_buf) == 0) + if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, + dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } @@ -2175,7 +2273,13 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; + if (!(rt->rt6i_flags & RTF_EXPIRES)) + expires = 0; + else if (rt->rt6i_expires - jiffies < INT_MAX) + expires = rt->rt6i_expires - jiffies; + else + expires = INT_MAX; + if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, rt->u.dst.error) < 0) goto nla_put_failure; @@ -2198,14 +2302,15 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) } else prefix = 0; - return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, + return rt6_fill_node(arg->net, + arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, - prefix, NLM_F_MULTI); + prefix, 0, NLM_F_MULTI); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; @@ -2261,12 +2366,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(&init_net, NULL, &fl); - skb->dst = &rt->u.dst; + rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); + skb_dst_set(skb, &rt->u.dst); - err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, + err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, 0, 0); + nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -2291,16 +2396,17 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) if (skb == NULL) goto errout; - err = rt6_fill_node(skb, rt, NULL, NULL, 0, - event, info->pid, seq, 0, 0); + err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, + event, info->pid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, - info->nlh, gfp_any()); + rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + info->nlh, gfp_any()); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); @@ -2310,7 +2416,7 @@ static int ip6_route_dev_notify(struct notifier_block *this, unsigned long event, void *data) { struct net_device *dev = (struct net_device *)data; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { net->ipv6.ip6_null_entry->u.dst.dev = dev; @@ -2347,19 +2453,16 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; - seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr), - rt->rt6i_dst.plen); + seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr), - rt->rt6i_src.plen); + seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif if (rt->rt6i_nexthop) { - seq_printf(m, NIP6_SEQFMT, - NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); + seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } @@ -2379,18 +2482,7 @@ static int ipv6_route_show(struct seq_file *m, void *v) static int ipv6_route_open(struct inode *inode, struct file *file) { - struct net *net = get_proc_net(inode); - if (!net) - return -ENXIO; - return single_open(file, ipv6_route_show, net); -} - -static int ipv6_route_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct net *net = seq->private; - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, ipv6_route_show); } static const struct file_operations ipv6_route_proc_fops = { @@ -2398,7 +2490,7 @@ static const struct file_operations ipv6_route_proc_fops = { .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = ipv6_route_release, + .release = single_release_net, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) @@ -2410,7 +2502,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) net->ipv6.rt6_stats->fib_rt_alloc, net->ipv6.rt6_stats->fib_rt_entries, net->ipv6.rt6_stats->fib_rt_cache, - atomic_read(&net->ipv6.ip6_dst_ops->entries), + atomic_read(&net->ipv6.ip6_dst_ops.entries), net->ipv6.rt6_stats->fib_discarded_routes); return 0; @@ -2418,16 +2510,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) static int rt6_stats_seq_open(struct inode *inode, struct file *file) { - struct net *net = get_proc_net(inode); - return single_open(file, rt6_stats_seq_show, net); -} - -static int rt6_stats_seq_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct net *net = (struct net *)seq->private; - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, rt6_stats_seq_show); } static const struct file_operations rt6_stats_seq_fops = { @@ -2435,20 +2518,20 @@ static const struct file_operations rt6_stats_seq_fops = { .open = rt6_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = rt6_stats_seq_release, + .release = single_release_net, }; #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SYSCTL static -int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, +int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; int delay = net->ipv6.sysctl.flush_delay; if (write) { - proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + proc_dointvec(ctl, write, buffer, lenp, ppos); fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); return 0; } else @@ -2461,91 +2544,75 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), .mode = 0200, - .proc_handler = &ipv6_sysctl_rtcache_flush + .proc_handler = ipv6_sysctl_rtcache_flush }, { - .ctl_name = NET_IPV6_ROUTE_GC_THRESH, .procname = "gc_thresh", .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, .procname = "max_size", .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, .procname = "gc_timeout", .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, .procname = "gc_interval", .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, .procname = "gc_elasticity", .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, .procname = "mtu_expires", .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, .procname = "min_adv_mss", .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, .procname = "gc_min_interval_ms", .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, }, - { .ctl_name = 0 } + { } }; -struct ctl_table *ipv6_route_sysctl_init(struct net *net) +struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) { struct ctl_table *table; @@ -2555,7 +2622,7 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net) if (table) { table[0].data = &net->ipv6.sysctl.flush_delay; - table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh; + table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; @@ -2563,23 +2630,19 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net) table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; + table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; } return table; } #endif -static int ip6_route_net_init(struct net *net) +static int __net_init ip6_route_net_init(struct net *net) { - int ret = 0; + int ret = -ENOMEM; - ret = -ENOMEM; - net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template, - sizeof(*net->ipv6.ip6_dst_ops), - GFP_KERNEL); - if (!net->ipv6.ip6_dst_ops) - goto out; - net->ipv6.ip6_dst_ops->dst_net = net; + memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, + sizeof(net->ipv6.ip6_dst_ops)); net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, sizeof(*net->ipv6.ip6_null_entry), @@ -2588,33 +2651,37 @@ static int ip6_route_net_init(struct net *net) goto out_ip6_dst_ops; net->ipv6.ip6_null_entry->u.dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; - net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops; + net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, sizeof(*net->ipv6.ip6_prohibit_entry), GFP_KERNEL); - if (!net->ipv6.ip6_prohibit_entry) { - kfree(net->ipv6.ip6_null_entry); - goto out; - } + if (!net->ipv6.ip6_prohibit_entry) + goto out_ip6_null_entry; net->ipv6.ip6_prohibit_entry->u.dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; - net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops; + net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), GFP_KERNEL); - if (!net->ipv6.ip6_blk_hole_entry) { - kfree(net->ipv6.ip6_null_entry); - kfree(net->ipv6.ip6_prohibit_entry); - goto out; - } + if (!net->ipv6.ip6_blk_hole_entry) + goto out_ip6_prohibit_entry; net->ipv6.ip6_blk_hole_entry->u.dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; - net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops; + net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; #endif + net->ipv6.sysctl.flush_delay = 0; + net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; + net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; + net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; + net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; + net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; + net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; + #ifdef CONFIG_PROC_FS proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); @@ -2625,12 +2692,17 @@ static int ip6_route_net_init(struct net *net) out: return ret; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +out_ip6_prohibit_entry: + kfree(net->ipv6.ip6_prohibit_entry); +out_ip6_null_entry: + kfree(net->ipv6.ip6_null_entry); +#endif out_ip6_dst_ops: - kfree(net->ipv6.ip6_dst_ops); goto out; } -static void ip6_route_net_exit(struct net *net) +static void __net_exit ip6_route_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS proc_net_remove(net, "ipv6_route"); @@ -2641,7 +2713,6 @@ static void ip6_route_net_exit(struct net *net) kfree(net->ipv6.ip6_prohibit_entry); kfree(net->ipv6.ip6_blk_hole_entry); #endif - kfree(net->ipv6.ip6_dst_ops); } static struct pernet_operations ip6_route_net_ops = { @@ -2663,12 +2734,14 @@ int __init ip6_route_init(void) kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN, NULL); if (!ip6_dst_ops_template.kmem_cachep) - goto out;; + goto out; ret = register_pernet_subsys(&ip6_route_net_ops); if (ret) goto out_kmem_cache; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; + /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */