2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
26 * Fixed routing subtrees.
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <net/net_namespace.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 #define CLONE_OFFLINK_ROUTE 0
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
100 static struct dst_ops ip6_dst_ops = {
102 .protocol = __constant_htons(ETH_P_IPV6),
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = ip6_local_out,
112 .entry_size = sizeof(struct rt6_info),
113 .entries = ATOMIC_INIT(0),
116 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
120 static struct dst_ops ip6_dst_blackhole_ops = {
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
127 .entries = ATOMIC_INIT(0),
130 static struct rt6_info ip6_null_entry_template = {
133 .__refcnt = ATOMIC_INIT(1),
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
143 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
144 .rt6i_metric = ~(u32) 0,
145 .rt6i_ref = ATOMIC_INIT(1),
148 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
150 static int ip6_pkt_prohibit(struct sk_buff *skb);
151 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
153 struct rt6_info ip6_prohibit_entry_template = {
156 .__refcnt = ATOMIC_INIT(1),
160 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
161 .input = ip6_pkt_prohibit,
162 .output = ip6_pkt_prohibit_out,
166 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
167 .rt6i_metric = ~(u32) 0,
168 .rt6i_ref = ATOMIC_INIT(1),
171 static struct rt6_info ip6_blk_hole_entry_template = {
174 .__refcnt = ATOMIC_INIT(1),
178 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
179 .input = dst_discard,
180 .output = dst_discard,
184 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
185 .rt6i_metric = ~(u32) 0,
186 .rt6i_ref = ATOMIC_INIT(1),
191 /* allocate dst with ip6_dst_ops */
192 static __inline__ struct rt6_info *ip6_dst_alloc(void)
194 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
197 static void ip6_dst_destroy(struct dst_entry *dst)
199 struct rt6_info *rt = (struct rt6_info *)dst;
200 struct inet6_dev *idev = rt->rt6i_idev;
203 rt->rt6i_idev = NULL;
208 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
211 struct rt6_info *rt = (struct rt6_info *)dst;
212 struct inet6_dev *idev = rt->rt6i_idev;
213 struct net_device *loopback_dev =
214 dev->nd_net->loopback_dev;
216 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
217 struct inet6_dev *loopback_idev =
218 in6_dev_get(loopback_dev);
219 if (loopback_idev != NULL) {
220 rt->rt6i_idev = loopback_idev;
226 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
228 return (rt->rt6i_flags & RTF_EXPIRES &&
229 time_after(jiffies, rt->rt6i_expires));
232 static inline int rt6_need_strict(struct in6_addr *daddr)
234 return (ipv6_addr_type(daddr) &
235 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
239 * Route lookup. Any table->tb6_lock is implied.
242 static inline struct rt6_info *rt6_device_match(struct net *net,
247 struct rt6_info *local = NULL;
248 struct rt6_info *sprt;
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
253 if (dev->ifindex == oif)
255 if (dev->flags & IFF_LOOPBACK) {
256 if (sprt->rt6i_idev == NULL ||
257 sprt->rt6i_idev->dev->ifindex != oif) {
260 if (local && (!oif ||
261 local->rt6i_idev->dev->ifindex == oif))
272 return net->ipv6.ip6_null_entry;
277 #ifdef CONFIG_IPV6_ROUTER_PREF
278 static void rt6_probe(struct rt6_info *rt)
280 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
282 * Okay, this does not seem to be appropriate
283 * for now, however, we need to check if it
284 * is really so; aka Router Reachability Probing.
286 * Router Reachability Probe MUST be rate-limited
287 * to no more than one per minute.
289 if (!neigh || (neigh->nud_state & NUD_VALID))
291 read_lock_bh(&neigh->lock);
292 if (!(neigh->nud_state & NUD_VALID) &&
293 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
294 struct in6_addr mcaddr;
295 struct in6_addr *target;
297 neigh->updated = jiffies;
298 read_unlock_bh(&neigh->lock);
300 target = (struct in6_addr *)&neigh->primary_key;
301 addrconf_addr_solict_mult(target, &mcaddr);
302 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
304 read_unlock_bh(&neigh->lock);
307 static inline void rt6_probe(struct rt6_info *rt)
314 * Default Router Selection (RFC 2461 6.3.6)
316 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
318 struct net_device *dev = rt->rt6i_dev;
319 if (!oif || dev->ifindex == oif)
321 if ((dev->flags & IFF_LOOPBACK) &&
322 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
327 static inline int rt6_check_neigh(struct rt6_info *rt)
329 struct neighbour *neigh = rt->rt6i_nexthop;
331 if (rt->rt6i_flags & RTF_NONEXTHOP ||
332 !(rt->rt6i_flags & RTF_GATEWAY))
335 read_lock_bh(&neigh->lock);
336 if (neigh->nud_state & NUD_VALID)
338 #ifdef CONFIG_IPV6_ROUTER_PREF
339 else if (neigh->nud_state & NUD_FAILED)
344 read_unlock_bh(&neigh->lock);
350 static int rt6_score_route(struct rt6_info *rt, int oif,
355 m = rt6_check_dev(rt, oif);
356 if (!m && (strict & RT6_LOOKUP_F_IFACE))
358 #ifdef CONFIG_IPV6_ROUTER_PREF
359 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361 n = rt6_check_neigh(rt);
362 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
367 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
368 int *mpri, struct rt6_info *match)
372 if (rt6_check_expired(rt))
375 m = rt6_score_route(rt, oif, strict);
380 if (strict & RT6_LOOKUP_F_REACHABLE)
384 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
392 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
393 struct rt6_info *rr_head,
394 u32 metric, int oif, int strict)
396 struct rt6_info *rt, *match;
400 for (rt = rr_head; rt && rt->rt6i_metric == metric;
401 rt = rt->u.dst.rt6_next)
402 match = find_match(rt, oif, strict, &mpri, match);
403 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
404 rt = rt->u.dst.rt6_next)
405 match = find_match(rt, oif, strict, &mpri, match);
410 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412 struct rt6_info *match, *rt0;
415 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416 __FUNCTION__, fn->leaf, oif);
420 fn->rr_ptr = rt0 = fn->leaf;
422 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
425 (strict & RT6_LOOKUP_F_REACHABLE)) {
426 struct rt6_info *next = rt0->u.dst.rt6_next;
428 /* no entries matched; do round-robin */
429 if (!next || next->rt6i_metric != rt0->rt6i_metric)
436 RT6_TRACE("%s() => %p\n",
437 __FUNCTION__, match);
439 net = rt0->rt6i_dev->nd_net;
440 return (match ? match : net->ipv6.ip6_null_entry);
443 #ifdef CONFIG_IPV6_ROUTE_INFO
444 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445 struct in6_addr *gwaddr)
447 struct net *net = dev->nd_net;
448 struct route_info *rinfo = (struct route_info *) opt;
449 struct in6_addr prefix_buf, *prefix;
454 if (len < sizeof(struct route_info)) {
458 /* Sanity check for prefix_len and length */
459 if (rinfo->length > 3) {
461 } else if (rinfo->prefix_len > 128) {
463 } else if (rinfo->prefix_len > 64) {
464 if (rinfo->length < 2) {
467 } else if (rinfo->prefix_len > 0) {
468 if (rinfo->length < 1) {
473 pref = rinfo->route_pref;
474 if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 pref = ICMPV6_ROUTER_PREF_MEDIUM;
477 lifetime = ntohl(rinfo->lifetime);
478 if (lifetime == 0xffffffff) {
480 } else if (lifetime > 0x7fffffff/HZ) {
481 /* Avoid arithmetic overflow */
482 lifetime = 0x7fffffff/HZ - 1;
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
492 prefix = &prefix_buf;
495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498 if (rt && !lifetime) {
504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
511 if (lifetime == 0xffffffff) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
517 dst_release(&rt->u.dst);
523 #define BACKTRACK(__net, saddr) \
525 if (rt == __net->ipv6.ip6_null_entry) { \
526 struct fib6_node *pn; \
528 if (fn->fn_flags & RTN_TL_ROOT) \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
535 if (fn->fn_flags & RTN_RTINFO) \
541 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
542 struct fib6_table *table,
543 struct flowi *fl, int flags)
545 struct fib6_node *fn;
548 read_lock_bh(&table->tb6_lock);
549 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552 rt = rt6_device_match(net, rt, fl->oif, flags);
553 BACKTRACK(net, &fl->fl6_src);
555 dst_use(&rt->u.dst, jiffies);
556 read_unlock_bh(&table->tb6_lock);
561 struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
562 struct in6_addr *saddr, int oif, int strict)
572 struct dst_entry *dst;
573 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
576 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
582 return (struct rt6_info *) dst;
589 EXPORT_SYMBOL(rt6_lookup);
591 /* ip6_ins_rt is called with FREE table->tb6_lock.
592 It takes new route entry, the addition fails by any reason the
593 route is freed. In any case, if caller does not hold it, it may
597 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
600 struct fib6_table *table;
602 table = rt->rt6i_table;
603 write_lock_bh(&table->tb6_lock);
604 err = fib6_add(&table->tb6_root, rt, info);
605 write_unlock_bh(&table->tb6_lock);
610 int ip6_ins_rt(struct rt6_info *rt)
612 struct nl_info info = {
613 .nl_net = rt->rt6i_dev->nd_net,
615 return __ip6_ins_rt(rt, &info);
618 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619 struct in6_addr *saddr)
627 rt = ip6_rt_copy(ort);
630 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631 if (rt->rt6i_dst.plen != 128 &&
632 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633 rt->rt6i_flags |= RTF_ANYCAST;
634 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
637 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
638 rt->rt6i_dst.plen = 128;
639 rt->rt6i_flags |= RTF_CACHE;
640 rt->u.dst.flags |= DST_HOST;
642 #ifdef CONFIG_IPV6_SUBTREES
643 if (rt->rt6i_src.plen && saddr) {
644 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645 rt->rt6i_src.plen = 128;
649 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
656 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
658 struct rt6_info *rt = ip6_rt_copy(ort);
660 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
661 rt->rt6i_dst.plen = 128;
662 rt->rt6i_flags |= RTF_CACHE;
663 rt->u.dst.flags |= DST_HOST;
664 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
669 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
670 struct flowi *fl, int flags)
672 struct fib6_node *fn;
673 struct rt6_info *rt, *nrt;
677 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
679 strict |= flags & RT6_LOOKUP_F_IFACE;
682 read_lock_bh(&table->tb6_lock);
685 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
688 rt = rt6_select(fn, oif, strict | reachable);
690 BACKTRACK(net, &fl->fl6_src);
691 if (rt == net->ipv6.ip6_null_entry ||
692 rt->rt6i_flags & RTF_CACHE)
695 dst_hold(&rt->u.dst);
696 read_unlock_bh(&table->tb6_lock);
698 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
699 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
701 #if CLONE_OFFLINK_ROUTE
702 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
708 dst_release(&rt->u.dst);
709 rt = nrt ? : net->ipv6.ip6_null_entry;
711 dst_hold(&rt->u.dst);
713 err = ip6_ins_rt(nrt);
722 * Race condition! In the gap, when table->tb6_lock was
723 * released someone could insert this route. Relookup.
725 dst_release(&rt->u.dst);
733 dst_hold(&rt->u.dst);
734 read_unlock_bh(&table->tb6_lock);
736 rt->u.dst.lastuse = jiffies;
742 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
743 struct flowi *fl, int flags)
745 return ip6_pol_route(net, table, fl->iif, fl, flags);
748 void ip6_route_input(struct sk_buff *skb)
750 struct ipv6hdr *iph = ipv6_hdr(skb);
751 struct net *net = skb->dev->nd_net;
752 int flags = RT6_LOOKUP_F_HAS_SADDR;
754 .iif = skb->dev->ifindex,
759 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
763 .proto = iph->nexthdr,
766 if (rt6_need_strict(&iph->daddr))
767 flags |= RT6_LOOKUP_F_IFACE;
769 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
772 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
773 struct flowi *fl, int flags)
775 return ip6_pol_route(net, table, fl->oif, fl, flags);
778 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
782 if (rt6_need_strict(&fl->fl6_dst))
783 flags |= RT6_LOOKUP_F_IFACE;
785 if (!ipv6_addr_any(&fl->fl6_src))
786 flags |= RT6_LOOKUP_F_HAS_SADDR;
788 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
791 EXPORT_SYMBOL(ip6_route_output);
793 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
795 struct rt6_info *ort = (struct rt6_info *) *dstp;
796 struct rt6_info *rt = (struct rt6_info *)
797 dst_alloc(&ip6_dst_blackhole_ops);
798 struct dst_entry *new = NULL;
803 atomic_set(&new->__refcnt, 1);
805 new->input = dst_discard;
806 new->output = dst_discard;
808 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
809 new->dev = ort->u.dst.dev;
812 rt->rt6i_idev = ort->rt6i_idev;
814 in6_dev_hold(rt->rt6i_idev);
815 rt->rt6i_expires = 0;
817 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
818 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
821 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
822 #ifdef CONFIG_IPV6_SUBTREES
823 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
831 return (new ? 0 : -ENOMEM);
833 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
836 * Destination cache support functions
839 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
843 rt = (struct rt6_info *) dst;
845 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
851 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
853 struct rt6_info *rt = (struct rt6_info *) dst;
856 if (rt->rt6i_flags & RTF_CACHE)
864 static void ip6_link_failure(struct sk_buff *skb)
868 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
870 rt = (struct rt6_info *) skb->dst;
872 if (rt->rt6i_flags&RTF_CACHE) {
873 dst_set_expires(&rt->u.dst, 0);
874 rt->rt6i_flags |= RTF_EXPIRES;
875 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
876 rt->rt6i_node->fn_sernum = -1;
880 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
882 struct rt6_info *rt6 = (struct rt6_info*)dst;
884 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
885 rt6->rt6i_flags |= RTF_MODIFIED;
886 if (mtu < IPV6_MIN_MTU) {
888 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
890 dst->metrics[RTAX_MTU-1] = mtu;
891 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
895 static int ipv6_get_mtu(struct net_device *dev);
897 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
899 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
901 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
902 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
905 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
906 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
907 * IPV6_MAXPLEN is also valid and means: "any MSS,
908 * rely only on pmtu discovery"
910 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
915 static struct dst_entry *icmp6_dst_gc_list;
916 static DEFINE_SPINLOCK(icmp6_dst_lock);
918 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
919 struct neighbour *neigh,
920 struct in6_addr *addr)
923 struct inet6_dev *idev = in6_dev_get(dev);
924 struct net *net = dev->nd_net;
926 if (unlikely(idev == NULL))
929 rt = ip6_dst_alloc();
930 if (unlikely(rt == NULL)) {
939 neigh = ndisc_get_neigh(dev, addr);
942 rt->rt6i_idev = idev;
943 rt->rt6i_nexthop = neigh;
944 atomic_set(&rt->u.dst.__refcnt, 1);
945 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
946 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
947 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
948 rt->u.dst.output = ip6_output;
950 #if 0 /* there's no chance to use these for ndisc */
951 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
954 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
955 rt->rt6i_dst.plen = 128;
958 spin_lock_bh(&icmp6_dst_lock);
959 rt->u.dst.next = icmp6_dst_gc_list;
960 icmp6_dst_gc_list = &rt->u.dst;
961 spin_unlock_bh(&icmp6_dst_lock);
963 fib6_force_start_gc(net);
969 int icmp6_dst_gc(int *more)
971 struct dst_entry *dst, *next, **pprev;
977 spin_lock_bh(&icmp6_dst_lock);
978 pprev = &icmp6_dst_gc_list;
980 while ((dst = *pprev) != NULL) {
981 if (!atomic_read(&dst->__refcnt)) {
991 spin_unlock_bh(&icmp6_dst_lock);
996 static int ip6_dst_gc(struct dst_ops *ops)
998 static unsigned expire = 30*HZ;
999 static unsigned long last_gc;
1000 unsigned long now = jiffies;
1002 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1003 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1007 fib6_run_gc(expire, &init_net);
1009 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1010 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1013 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1014 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1017 /* Clean host part of a prefix. Not necessary in radix tree,
1018 but results in cleaner routing tables.
1020 Remove it only when all the things will work!
1023 static int ipv6_get_mtu(struct net_device *dev)
1025 int mtu = IPV6_MIN_MTU;
1026 struct inet6_dev *idev;
1028 idev = in6_dev_get(dev);
1030 mtu = idev->cnf.mtu6;
1036 int ipv6_get_hoplimit(struct net_device *dev)
1038 int hoplimit = ipv6_devconf.hop_limit;
1039 struct inet6_dev *idev;
1041 idev = in6_dev_get(dev);
1043 hoplimit = idev->cnf.hop_limit;
1053 int ip6_route_add(struct fib6_config *cfg)
1056 struct net *net = cfg->fc_nlinfo.nl_net;
1057 struct rt6_info *rt = NULL;
1058 struct net_device *dev = NULL;
1059 struct inet6_dev *idev = NULL;
1060 struct fib6_table *table;
1063 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1065 #ifndef CONFIG_IPV6_SUBTREES
1066 if (cfg->fc_src_len)
1069 if (cfg->fc_ifindex) {
1071 dev = dev_get_by_index(net, cfg->fc_ifindex);
1074 idev = in6_dev_get(dev);
1079 if (cfg->fc_metric == 0)
1080 cfg->fc_metric = IP6_RT_PRIO_USER;
1082 table = fib6_new_table(net, cfg->fc_table);
1083 if (table == NULL) {
1088 rt = ip6_dst_alloc();
1095 rt->u.dst.obsolete = -1;
1096 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1098 if (cfg->fc_protocol == RTPROT_UNSPEC)
1099 cfg->fc_protocol = RTPROT_BOOT;
1100 rt->rt6i_protocol = cfg->fc_protocol;
1102 addr_type = ipv6_addr_type(&cfg->fc_dst);
1104 if (addr_type & IPV6_ADDR_MULTICAST)
1105 rt->u.dst.input = ip6_mc_input;
1107 rt->u.dst.input = ip6_forward;
1109 rt->u.dst.output = ip6_output;
1111 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1112 rt->rt6i_dst.plen = cfg->fc_dst_len;
1113 if (rt->rt6i_dst.plen == 128)
1114 rt->u.dst.flags = DST_HOST;
1116 #ifdef CONFIG_IPV6_SUBTREES
1117 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1118 rt->rt6i_src.plen = cfg->fc_src_len;
1121 rt->rt6i_metric = cfg->fc_metric;
1123 /* We cannot add true routes via loopback here,
1124 they would result in kernel looping; promote them to reject routes
1126 if ((cfg->fc_flags & RTF_REJECT) ||
1127 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1128 /* hold loopback dev/idev if we haven't done so. */
1129 if (dev != net->loopback_dev) {
1134 dev = net->loopback_dev;
1136 idev = in6_dev_get(dev);
1142 rt->u.dst.output = ip6_pkt_discard_out;
1143 rt->u.dst.input = ip6_pkt_discard;
1144 rt->u.dst.error = -ENETUNREACH;
1145 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1149 if (cfg->fc_flags & RTF_GATEWAY) {
1150 struct in6_addr *gw_addr;
1153 gw_addr = &cfg->fc_gateway;
1154 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1155 gwa_type = ipv6_addr_type(gw_addr);
1157 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1158 struct rt6_info *grt;
1160 /* IPv6 strictly inhibits using not link-local
1161 addresses as nexthop address.
1162 Otherwise, router will not able to send redirects.
1163 It is very good, but in some (rare!) circumstances
1164 (SIT, PtP, NBMA NOARP links) it is handy to allow
1165 some exceptions. --ANK
1168 if (!(gwa_type&IPV6_ADDR_UNICAST))
1171 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1173 err = -EHOSTUNREACH;
1177 if (dev != grt->rt6i_dev) {
1178 dst_release(&grt->u.dst);
1182 dev = grt->rt6i_dev;
1183 idev = grt->rt6i_idev;
1185 in6_dev_hold(grt->rt6i_idev);
1187 if (!(grt->rt6i_flags&RTF_GATEWAY))
1189 dst_release(&grt->u.dst);
1195 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1203 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1204 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1205 if (IS_ERR(rt->rt6i_nexthop)) {
1206 err = PTR_ERR(rt->rt6i_nexthop);
1207 rt->rt6i_nexthop = NULL;
1212 rt->rt6i_flags = cfg->fc_flags;
1219 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1220 int type = nla_type(nla);
1223 if (type > RTAX_MAX) {
1228 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1233 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1234 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1235 if (!rt->u.dst.metrics[RTAX_MTU-1])
1236 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1237 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1238 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1239 rt->u.dst.dev = dev;
1240 rt->rt6i_idev = idev;
1241 rt->rt6i_table = table;
1243 cfg->fc_nlinfo.nl_net = dev->nd_net;
1245 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1253 dst_free(&rt->u.dst);
1257 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1260 struct fib6_table *table;
1261 struct net *net = rt->rt6i_dev->nd_net;
1263 if (rt == net->ipv6.ip6_null_entry)
1266 table = rt->rt6i_table;
1267 write_lock_bh(&table->tb6_lock);
1269 err = fib6_del(rt, info);
1270 dst_release(&rt->u.dst);
1272 write_unlock_bh(&table->tb6_lock);
1277 int ip6_del_rt(struct rt6_info *rt)
1279 struct nl_info info = {
1280 .nl_net = rt->rt6i_dev->nd_net,
1282 return __ip6_del_rt(rt, &info);
1285 static int ip6_route_del(struct fib6_config *cfg)
1287 struct fib6_table *table;
1288 struct fib6_node *fn;
1289 struct rt6_info *rt;
1292 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1296 read_lock_bh(&table->tb6_lock);
1298 fn = fib6_locate(&table->tb6_root,
1299 &cfg->fc_dst, cfg->fc_dst_len,
1300 &cfg->fc_src, cfg->fc_src_len);
1303 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1304 if (cfg->fc_ifindex &&
1305 (rt->rt6i_dev == NULL ||
1306 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1308 if (cfg->fc_flags & RTF_GATEWAY &&
1309 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1311 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1313 dst_hold(&rt->u.dst);
1314 read_unlock_bh(&table->tb6_lock);
1316 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1319 read_unlock_bh(&table->tb6_lock);
1327 struct ip6rd_flowi {
1329 struct in6_addr gateway;
1332 static struct rt6_info *__ip6_route_redirect(struct net *net,
1333 struct fib6_table *table,
1337 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1338 struct rt6_info *rt;
1339 struct fib6_node *fn;
1342 * Get the "current" route for this destination and
1343 * check if the redirect has come from approriate router.
1345 * RFC 2461 specifies that redirects should only be
1346 * accepted if they come from the nexthop to the target.
1347 * Due to the way the routes are chosen, this notion
1348 * is a bit fuzzy and one might need to check all possible
1352 read_lock_bh(&table->tb6_lock);
1353 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1355 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1357 * Current route is on-link; redirect is always invalid.
1359 * Seems, previous statement is not true. It could
1360 * be node, which looks for us as on-link (f.e. proxy ndisc)
1361 * But then router serving it might decide, that we should
1362 * know truth 8)8) --ANK (980726).
1364 if (rt6_check_expired(rt))
1366 if (!(rt->rt6i_flags & RTF_GATEWAY))
1368 if (fl->oif != rt->rt6i_dev->ifindex)
1370 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1376 rt = net->ipv6.ip6_null_entry;
1377 BACKTRACK(net, &fl->fl6_src);
1379 dst_hold(&rt->u.dst);
1381 read_unlock_bh(&table->tb6_lock);
1386 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1387 struct in6_addr *src,
1388 struct in6_addr *gateway,
1389 struct net_device *dev)
1391 int flags = RT6_LOOKUP_F_HAS_SADDR;
1392 struct net *net = dev->nd_net;
1393 struct ip6rd_flowi rdfl = {
1395 .oif = dev->ifindex,
1403 .gateway = *gateway,
1406 if (rt6_need_strict(dest))
1407 flags |= RT6_LOOKUP_F_IFACE;
1409 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1410 flags, __ip6_route_redirect);
1413 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1414 struct in6_addr *saddr,
1415 struct neighbour *neigh, u8 *lladdr, int on_link)
1417 struct rt6_info *rt, *nrt = NULL;
1418 struct netevent_redirect netevent;
1419 struct net *net = neigh->dev->nd_net;
1421 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1423 if (rt == net->ipv6.ip6_null_entry) {
1424 if (net_ratelimit())
1425 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1426 "for redirect target\n");
1431 * We have finally decided to accept it.
1434 neigh_update(neigh, lladdr, NUD_STALE,
1435 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1436 NEIGH_UPDATE_F_OVERRIDE|
1437 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1438 NEIGH_UPDATE_F_ISROUTER))
1442 * Redirect received -> path was valid.
1443 * Look, redirects are sent only in response to data packets,
1444 * so that this nexthop apparently is reachable. --ANK
1446 dst_confirm(&rt->u.dst);
1448 /* Duplicate redirect: silently ignore. */
1449 if (neigh == rt->u.dst.neighbour)
1452 nrt = ip6_rt_copy(rt);
1456 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1458 nrt->rt6i_flags &= ~RTF_GATEWAY;
1460 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1461 nrt->rt6i_dst.plen = 128;
1462 nrt->u.dst.flags |= DST_HOST;
1464 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1465 nrt->rt6i_nexthop = neigh_clone(neigh);
1466 /* Reset pmtu, it may be better */
1467 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1468 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1469 dst_mtu(&nrt->u.dst));
1471 if (ip6_ins_rt(nrt))
1474 netevent.old = &rt->u.dst;
1475 netevent.new = &nrt->u.dst;
1476 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1478 if (rt->rt6i_flags&RTF_CACHE) {
1484 dst_release(&rt->u.dst);
1489 * Handle ICMP "packet too big" messages
1490 * i.e. Path MTU discovery
1493 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1494 struct net_device *dev, u32 pmtu)
1496 struct rt6_info *rt, *nrt;
1497 struct net *net = dev->nd_net;
1500 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1504 if (pmtu >= dst_mtu(&rt->u.dst))
1507 if (pmtu < IPV6_MIN_MTU) {
1509 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1510 * MTU (1280) and a fragment header should always be included
1511 * after a node receiving Too Big message reporting PMTU is
1512 * less than the IPv6 Minimum Link MTU.
1514 pmtu = IPV6_MIN_MTU;
1518 /* New mtu received -> path was valid.
1519 They are sent only in response to data packets,
1520 so that this nexthop apparently is reachable. --ANK
1522 dst_confirm(&rt->u.dst);
1524 /* Host route. If it is static, it would be better
1525 not to override it, but add new one, so that
1526 when cache entry will expire old pmtu
1527 would return automatically.
1529 if (rt->rt6i_flags & RTF_CACHE) {
1530 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1532 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1533 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1534 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1539 Two cases are possible:
1540 1. It is connected route. Action: COW
1541 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1543 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1544 nrt = rt6_alloc_cow(rt, daddr, saddr);
1546 nrt = rt6_alloc_clone(rt, daddr);
1549 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1551 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1553 /* According to RFC 1981, detecting PMTU increase shouldn't be
1554 * happened within 5 mins, the recommended timer is 10 mins.
1555 * Here this route expiration time is set to ip6_rt_mtu_expires
1556 * which is 10 mins. After 10 mins the decreased pmtu is expired
1557 * and detecting PMTU increase will be automatically happened.
1559 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1560 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1565 dst_release(&rt->u.dst);
1569 * Misc support functions
1572 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1574 struct rt6_info *rt = ip6_dst_alloc();
1577 rt->u.dst.input = ort->u.dst.input;
1578 rt->u.dst.output = ort->u.dst.output;
1580 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1581 rt->u.dst.error = ort->u.dst.error;
1582 rt->u.dst.dev = ort->u.dst.dev;
1584 dev_hold(rt->u.dst.dev);
1585 rt->rt6i_idev = ort->rt6i_idev;
1587 in6_dev_hold(rt->rt6i_idev);
1588 rt->u.dst.lastuse = jiffies;
1589 rt->rt6i_expires = 0;
1591 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1592 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1593 rt->rt6i_metric = 0;
1595 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1596 #ifdef CONFIG_IPV6_SUBTREES
1597 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1599 rt->rt6i_table = ort->rt6i_table;
1604 #ifdef CONFIG_IPV6_ROUTE_INFO
1605 static struct rt6_info *rt6_get_route_info(struct net *net,
1606 struct in6_addr *prefix, int prefixlen,
1607 struct in6_addr *gwaddr, int ifindex)
1609 struct fib6_node *fn;
1610 struct rt6_info *rt = NULL;
1611 struct fib6_table *table;
1613 table = fib6_get_table(net, RT6_TABLE_INFO);
1617 write_lock_bh(&table->tb6_lock);
1618 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1622 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1623 if (rt->rt6i_dev->ifindex != ifindex)
1625 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1627 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1629 dst_hold(&rt->u.dst);
1633 write_unlock_bh(&table->tb6_lock);
1637 static struct rt6_info *rt6_add_route_info(struct net *net,
1638 struct in6_addr *prefix, int prefixlen,
1639 struct in6_addr *gwaddr, int ifindex,
1642 struct fib6_config cfg = {
1643 .fc_table = RT6_TABLE_INFO,
1644 .fc_metric = IP6_RT_PRIO_USER,
1645 .fc_ifindex = ifindex,
1646 .fc_dst_len = prefixlen,
1647 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1648 RTF_UP | RTF_PREF(pref),
1650 .fc_nlinfo.nlh = NULL,
1651 .fc_nlinfo.nl_net = net,
1654 ipv6_addr_copy(&cfg.fc_dst, prefix);
1655 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1657 /* We should treat it as a default route if prefix length is 0. */
1659 cfg.fc_flags |= RTF_DEFAULT;
1661 ip6_route_add(&cfg);
1663 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1667 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1669 struct rt6_info *rt;
1670 struct fib6_table *table;
1672 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
1676 write_lock_bh(&table->tb6_lock);
1677 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1678 if (dev == rt->rt6i_dev &&
1679 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1680 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1684 dst_hold(&rt->u.dst);
1685 write_unlock_bh(&table->tb6_lock);
1689 EXPORT_SYMBOL(rt6_get_dflt_router);
1691 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1692 struct net_device *dev,
1695 struct fib6_config cfg = {
1696 .fc_table = RT6_TABLE_DFLT,
1697 .fc_metric = IP6_RT_PRIO_USER,
1698 .fc_ifindex = dev->ifindex,
1699 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1700 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1702 .fc_nlinfo.nlh = NULL,
1703 .fc_nlinfo.nl_net = dev->nd_net,
1706 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1708 ip6_route_add(&cfg);
1710 return rt6_get_dflt_router(gwaddr, dev);
1713 void rt6_purge_dflt_routers(struct net *net)
1715 struct rt6_info *rt;
1716 struct fib6_table *table;
1718 /* NOTE: Keep consistent with rt6_get_dflt_router */
1719 table = fib6_get_table(net, RT6_TABLE_DFLT);
1724 read_lock_bh(&table->tb6_lock);
1725 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1726 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1727 dst_hold(&rt->u.dst);
1728 read_unlock_bh(&table->tb6_lock);
1733 read_unlock_bh(&table->tb6_lock);
1736 static void rtmsg_to_fib6_config(struct net *net,
1737 struct in6_rtmsg *rtmsg,
1738 struct fib6_config *cfg)
1740 memset(cfg, 0, sizeof(*cfg));
1742 cfg->fc_table = RT6_TABLE_MAIN;
1743 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1744 cfg->fc_metric = rtmsg->rtmsg_metric;
1745 cfg->fc_expires = rtmsg->rtmsg_info;
1746 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1747 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1748 cfg->fc_flags = rtmsg->rtmsg_flags;
1750 cfg->fc_nlinfo.nl_net = net;
1752 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1753 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1754 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1757 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1759 struct fib6_config cfg;
1760 struct in6_rtmsg rtmsg;
1764 case SIOCADDRT: /* Add a route */
1765 case SIOCDELRT: /* Delete a route */
1766 if (!capable(CAP_NET_ADMIN))
1768 err = copy_from_user(&rtmsg, arg,
1769 sizeof(struct in6_rtmsg));
1773 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1778 err = ip6_route_add(&cfg);
1781 err = ip6_route_del(&cfg);
1795 * Drop the packet on the floor
1798 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1801 switch (ipstats_mib_noroutes) {
1802 case IPSTATS_MIB_INNOROUTES:
1803 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1804 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1805 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1809 case IPSTATS_MIB_OUTNOROUTES:
1810 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1813 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1818 static int ip6_pkt_discard(struct sk_buff *skb)
1820 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1823 static int ip6_pkt_discard_out(struct sk_buff *skb)
1825 skb->dev = skb->dst->dev;
1826 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1829 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1831 static int ip6_pkt_prohibit(struct sk_buff *skb)
1833 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1836 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1838 skb->dev = skb->dst->dev;
1839 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1845 * Allocate a dst for local (unicast / anycast) address.
1848 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1849 const struct in6_addr *addr,
1852 struct net *net = idev->dev->nd_net;
1853 struct rt6_info *rt = ip6_dst_alloc();
1856 return ERR_PTR(-ENOMEM);
1858 dev_hold(net->loopback_dev);
1861 rt->u.dst.flags = DST_HOST;
1862 rt->u.dst.input = ip6_input;
1863 rt->u.dst.output = ip6_output;
1864 rt->rt6i_dev = net->loopback_dev;
1865 rt->rt6i_idev = idev;
1866 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1867 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1868 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1869 rt->u.dst.obsolete = -1;
1871 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1873 rt->rt6i_flags |= RTF_ANYCAST;
1875 rt->rt6i_flags |= RTF_LOCAL;
1876 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1877 if (rt->rt6i_nexthop == NULL) {
1878 dst_free(&rt->u.dst);
1879 return ERR_PTR(-ENOMEM);
1882 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1883 rt->rt6i_dst.plen = 128;
1884 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1886 atomic_set(&rt->u.dst.__refcnt, 1);
1891 struct arg_dev_net {
1892 struct net_device *dev;
1896 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1898 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1899 struct net *net = ((struct arg_dev_net *)arg)->net;
1901 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1902 rt != net->ipv6.ip6_null_entry) {
1903 RT6_TRACE("deleted by ifdown %p\n", rt);
1909 void rt6_ifdown(struct net *net, struct net_device *dev)
1911 struct arg_dev_net adn = {
1916 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1919 struct rt6_mtu_change_arg
1921 struct net_device *dev;
1925 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1927 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1928 struct inet6_dev *idev;
1929 struct net *net = arg->dev->nd_net;
1931 /* In IPv6 pmtu discovery is not optional,
1932 so that RTAX_MTU lock cannot disable it.
1933 We still use this lock to block changes
1934 caused by addrconf/ndisc.
1937 idev = __in6_dev_get(arg->dev);
1941 /* For administrative MTU increase, there is no way to discover
1942 IPv6 PMTU increase, so PMTU increase should be updated here.
1943 Since RFC 1981 doesn't include administrative MTU increase
1944 update PMTU increase is a MUST. (i.e. jumbo frame)
1947 If new MTU is less than route PMTU, this new MTU will be the
1948 lowest MTU in the path, update the route PMTU to reflect PMTU
1949 decreases; if new MTU is greater than route PMTU, and the
1950 old MTU is the lowest MTU in the path, update the route PMTU
1951 to reflect the increase. In this case if the other nodes' MTU
1952 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1955 if (rt->rt6i_dev == arg->dev &&
1956 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1957 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1958 (dst_mtu(&rt->u.dst) < arg->mtu &&
1959 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1960 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1961 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1966 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1968 struct rt6_mtu_change_arg arg = {
1973 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1976 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1977 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
1978 [RTA_OIF] = { .type = NLA_U32 },
1979 [RTA_IIF] = { .type = NLA_U32 },
1980 [RTA_PRIORITY] = { .type = NLA_U32 },
1981 [RTA_METRICS] = { .type = NLA_NESTED },
1984 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1985 struct fib6_config *cfg)
1988 struct nlattr *tb[RTA_MAX+1];
1991 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1996 rtm = nlmsg_data(nlh);
1997 memset(cfg, 0, sizeof(*cfg));
1999 cfg->fc_table = rtm->rtm_table;
2000 cfg->fc_dst_len = rtm->rtm_dst_len;
2001 cfg->fc_src_len = rtm->rtm_src_len;
2002 cfg->fc_flags = RTF_UP;
2003 cfg->fc_protocol = rtm->rtm_protocol;
2005 if (rtm->rtm_type == RTN_UNREACHABLE)
2006 cfg->fc_flags |= RTF_REJECT;
2008 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2009 cfg->fc_nlinfo.nlh = nlh;
2010 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
2012 if (tb[RTA_GATEWAY]) {
2013 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2014 cfg->fc_flags |= RTF_GATEWAY;
2018 int plen = (rtm->rtm_dst_len + 7) >> 3;
2020 if (nla_len(tb[RTA_DST]) < plen)
2023 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2027 int plen = (rtm->rtm_src_len + 7) >> 3;
2029 if (nla_len(tb[RTA_SRC]) < plen)
2032 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2036 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2038 if (tb[RTA_PRIORITY])
2039 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2041 if (tb[RTA_METRICS]) {
2042 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2043 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2047 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2054 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2056 struct fib6_config cfg;
2059 err = rtm_to_fib6_config(skb, nlh, &cfg);
2063 return ip6_route_del(&cfg);
2066 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2068 struct fib6_config cfg;
2071 err = rtm_to_fib6_config(skb, nlh, &cfg);
2075 return ip6_route_add(&cfg);
2078 static inline size_t rt6_nlmsg_size(void)
2080 return NLMSG_ALIGN(sizeof(struct rtmsg))
2081 + nla_total_size(16) /* RTA_SRC */
2082 + nla_total_size(16) /* RTA_DST */
2083 + nla_total_size(16) /* RTA_GATEWAY */
2084 + nla_total_size(16) /* RTA_PREFSRC */
2085 + nla_total_size(4) /* RTA_TABLE */
2086 + nla_total_size(4) /* RTA_IIF */
2087 + nla_total_size(4) /* RTA_OIF */
2088 + nla_total_size(4) /* RTA_PRIORITY */
2089 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2090 + nla_total_size(sizeof(struct rta_cacheinfo));
2093 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2094 struct in6_addr *dst, struct in6_addr *src,
2095 int iif, int type, u32 pid, u32 seq,
2096 int prefix, unsigned int flags)
2099 struct nlmsghdr *nlh;
2103 if (prefix) { /* user wants prefix routes only */
2104 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2105 /* success since this is not a prefix route */
2110 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2114 rtm = nlmsg_data(nlh);
2115 rtm->rtm_family = AF_INET6;
2116 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2117 rtm->rtm_src_len = rt->rt6i_src.plen;
2120 table = rt->rt6i_table->tb6_id;
2122 table = RT6_TABLE_UNSPEC;
2123 rtm->rtm_table = table;
2124 NLA_PUT_U32(skb, RTA_TABLE, table);
2125 if (rt->rt6i_flags&RTF_REJECT)
2126 rtm->rtm_type = RTN_UNREACHABLE;
2127 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2128 rtm->rtm_type = RTN_LOCAL;
2130 rtm->rtm_type = RTN_UNICAST;
2132 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2133 rtm->rtm_protocol = rt->rt6i_protocol;
2134 if (rt->rt6i_flags&RTF_DYNAMIC)
2135 rtm->rtm_protocol = RTPROT_REDIRECT;
2136 else if (rt->rt6i_flags & RTF_ADDRCONF)
2137 rtm->rtm_protocol = RTPROT_KERNEL;
2138 else if (rt->rt6i_flags&RTF_DEFAULT)
2139 rtm->rtm_protocol = RTPROT_RA;
2141 if (rt->rt6i_flags&RTF_CACHE)
2142 rtm->rtm_flags |= RTM_F_CLONED;
2145 NLA_PUT(skb, RTA_DST, 16, dst);
2146 rtm->rtm_dst_len = 128;
2147 } else if (rtm->rtm_dst_len)
2148 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2149 #ifdef CONFIG_IPV6_SUBTREES
2151 NLA_PUT(skb, RTA_SRC, 16, src);
2152 rtm->rtm_src_len = 128;
2153 } else if (rtm->rtm_src_len)
2154 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2157 NLA_PUT_U32(skb, RTA_IIF, iif);
2159 struct in6_addr saddr_buf;
2160 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2161 dst, &saddr_buf) == 0)
2162 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2165 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2166 goto nla_put_failure;
2168 if (rt->u.dst.neighbour)
2169 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2172 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2174 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2176 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2177 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2178 expires, rt->u.dst.error) < 0)
2179 goto nla_put_failure;
2181 return nlmsg_end(skb, nlh);
2184 nlmsg_cancel(skb, nlh);
2188 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2190 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2193 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2194 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2195 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2199 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2200 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2201 prefix, NLM_F_MULTI);
2204 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2206 struct net *net = in_skb->sk->sk_net;
2207 struct nlattr *tb[RTA_MAX+1];
2208 struct rt6_info *rt;
2209 struct sk_buff *skb;
2214 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2219 memset(&fl, 0, sizeof(fl));
2222 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2225 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2229 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2232 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2236 iif = nla_get_u32(tb[RTA_IIF]);
2239 fl.oif = nla_get_u32(tb[RTA_OIF]);
2242 struct net_device *dev;
2243 dev = __dev_get_by_index(net, iif);
2250 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2256 /* Reserve room for dummy headers, this skb can pass
2257 through good chunk of routing engine.
2259 skb_reset_mac_header(skb);
2260 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2262 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2263 skb->dst = &rt->u.dst;
2265 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2266 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2267 nlh->nlmsg_seq, 0, 0);
2273 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2278 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2280 struct sk_buff *skb;
2281 struct net *net = info->nl_net;
2286 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2288 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2292 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2293 event, info->pid, seq, 0, 0);
2295 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2296 WARN_ON(err == -EMSGSIZE);
2300 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2301 info->nlh, gfp_any());
2304 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2307 static int ip6_route_dev_notify(struct notifier_block *this,
2308 unsigned long event, void *data)
2310 struct net_device *dev = (struct net_device *)data;
2311 struct net *net = dev->nd_net;
2313 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2314 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2315 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2316 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2317 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2318 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2319 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2320 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2331 #ifdef CONFIG_PROC_FS
2333 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2344 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2346 struct seq_file *m = p_arg;
2348 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2351 #ifdef CONFIG_IPV6_SUBTREES
2352 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2355 seq_puts(m, "00000000000000000000000000000000 00 ");
2358 if (rt->rt6i_nexthop) {
2359 seq_printf(m, NIP6_SEQFMT,
2360 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2362 seq_puts(m, "00000000000000000000000000000000");
2364 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2365 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2366 rt->u.dst.__use, rt->rt6i_flags,
2367 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2371 static int ipv6_route_show(struct seq_file *m, void *v)
2373 struct net *net = (struct net *)m->private;
2374 fib6_clean_all(net, rt6_info_route, 0, m);
2378 static int ipv6_route_open(struct inode *inode, struct file *file)
2380 struct net *net = get_proc_net(inode);
2383 return single_open(file, ipv6_route_show, net);
2386 static int ipv6_route_release(struct inode *inode, struct file *file)
2388 struct seq_file *seq = file->private_data;
2389 struct net *net = seq->private;
2391 return single_release(inode, file);
2394 static const struct file_operations ipv6_route_proc_fops = {
2395 .owner = THIS_MODULE,
2396 .open = ipv6_route_open,
2398 .llseek = seq_lseek,
2399 .release = ipv6_route_release,
2402 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2404 struct net *net = (struct net *)seq->private;
2405 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2406 net->ipv6.rt6_stats->fib_nodes,
2407 net->ipv6.rt6_stats->fib_route_nodes,
2408 net->ipv6.rt6_stats->fib_rt_alloc,
2409 net->ipv6.rt6_stats->fib_rt_entries,
2410 net->ipv6.rt6_stats->fib_rt_cache,
2411 atomic_read(&ip6_dst_ops.entries),
2412 net->ipv6.rt6_stats->fib_discarded_routes);
2417 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2419 struct net *net = get_proc_net(inode);
2420 return single_open(file, rt6_stats_seq_show, net);
2423 static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2425 struct seq_file *seq = file->private_data;
2426 struct net *net = (struct net *)seq->private;
2428 return single_release(inode, file);
2431 static const struct file_operations rt6_stats_seq_fops = {
2432 .owner = THIS_MODULE,
2433 .open = rt6_stats_seq_open,
2435 .llseek = seq_lseek,
2436 .release = rt6_stats_seq_release,
2438 #endif /* CONFIG_PROC_FS */
2440 #ifdef CONFIG_SYSCTL
2443 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2444 void __user *buffer, size_t *lenp, loff_t *ppos)
2446 struct net *net = current->nsproxy->net_ns;
2447 int delay = net->ipv6.sysctl.flush_delay;
2449 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2450 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2456 ctl_table ipv6_route_table_template[] = {
2458 .procname = "flush",
2459 .data = &init_net.ipv6.sysctl.flush_delay,
2460 .maxlen = sizeof(int),
2462 .proc_handler = &ipv6_sysctl_rtcache_flush
2465 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2466 .procname = "gc_thresh",
2467 .data = &ip6_dst_ops.gc_thresh,
2468 .maxlen = sizeof(int),
2470 .proc_handler = &proc_dointvec,
2473 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2474 .procname = "max_size",
2475 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2476 .maxlen = sizeof(int),
2478 .proc_handler = &proc_dointvec,
2481 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2482 .procname = "gc_min_interval",
2483 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2484 .maxlen = sizeof(int),
2486 .proc_handler = &proc_dointvec_jiffies,
2487 .strategy = &sysctl_jiffies,
2490 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2491 .procname = "gc_timeout",
2492 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2493 .maxlen = sizeof(int),
2495 .proc_handler = &proc_dointvec_jiffies,
2496 .strategy = &sysctl_jiffies,
2499 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2500 .procname = "gc_interval",
2501 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2502 .maxlen = sizeof(int),
2504 .proc_handler = &proc_dointvec_jiffies,
2505 .strategy = &sysctl_jiffies,
2508 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2509 .procname = "gc_elasticity",
2510 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2511 .maxlen = sizeof(int),
2513 .proc_handler = &proc_dointvec_jiffies,
2514 .strategy = &sysctl_jiffies,
2517 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2518 .procname = "mtu_expires",
2519 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2520 .maxlen = sizeof(int),
2522 .proc_handler = &proc_dointvec_jiffies,
2523 .strategy = &sysctl_jiffies,
2526 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2527 .procname = "min_adv_mss",
2528 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2529 .maxlen = sizeof(int),
2531 .proc_handler = &proc_dointvec_jiffies,
2532 .strategy = &sysctl_jiffies,
2535 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2536 .procname = "gc_min_interval_ms",
2537 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2538 .maxlen = sizeof(int),
2540 .proc_handler = &proc_dointvec_ms_jiffies,
2541 .strategy = &sysctl_ms_jiffies,
2546 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2548 struct ctl_table *table;
2550 table = kmemdup(ipv6_route_table_template,
2551 sizeof(ipv6_route_table_template),
2555 table[0].data = &net->ipv6.sysctl.flush_delay;
2556 /* table[1].data will be handled when we have
2557 routes per namespace */
2558 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2559 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2560 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2561 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2562 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2563 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2564 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2571 static int ip6_route_net_init(struct net *net)
2576 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2577 sizeof(*net->ipv6.ip6_null_entry),
2579 if (!net->ipv6.ip6_null_entry)
2581 net->ipv6.ip6_null_entry->u.dst.path =
2582 (struct dst_entry *)net->ipv6.ip6_null_entry;
2584 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2585 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2586 sizeof(*net->ipv6.ip6_prohibit_entry),
2588 if (!net->ipv6.ip6_prohibit_entry) {
2589 kfree(net->ipv6.ip6_null_entry);
2592 net->ipv6.ip6_prohibit_entry->u.dst.path =
2593 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2595 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2596 sizeof(*net->ipv6.ip6_blk_hole_entry),
2598 if (!net->ipv6.ip6_blk_hole_entry) {
2599 kfree(net->ipv6.ip6_null_entry);
2600 kfree(net->ipv6.ip6_prohibit_entry);
2603 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2604 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2607 #ifdef CONFIG_PROC_FS
2608 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2609 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2616 static void ip6_route_net_exit(struct net *net)
2618 #ifdef CONFIG_PROC_FS
2619 proc_net_remove(net, "ipv6_route");
2620 proc_net_remove(net, "rt6_stats");
2622 kfree(net->ipv6.ip6_null_entry);
2623 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2624 kfree(net->ipv6.ip6_prohibit_entry);
2625 kfree(net->ipv6.ip6_blk_hole_entry);
2629 static struct pernet_operations ip6_route_net_ops = {
2630 .init = ip6_route_net_init,
2631 .exit = ip6_route_net_exit,
2634 static struct notifier_block ip6_route_dev_notifier = {
2635 .notifier_call = ip6_route_dev_notify,
2639 int __init ip6_route_init(void)
2643 ip6_dst_ops.kmem_cachep =
2644 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2645 SLAB_HWCACHE_ALIGN, NULL);
2646 if (!ip6_dst_ops.kmem_cachep)
2649 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2651 ret = register_pernet_subsys(&ip6_route_net_ops);
2653 goto out_kmem_cache;
2655 /* Registering of the loopback is done before this portion of code,
2656 * the loopback reference in rt6_info will not be taken, do it
2657 * manually for init_net */
2658 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2659 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2660 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2661 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2662 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2663 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2664 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2668 goto out_register_subsys;
2674 ret = fib6_rules_init();
2679 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2680 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2681 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2682 goto fib6_rules_init;
2684 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2686 goto fib6_rules_init;
2692 fib6_rules_cleanup();
2697 out_register_subsys:
2698 unregister_pernet_subsys(&ip6_route_net_ops);
2700 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2704 void ip6_route_cleanup(void)
2706 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2707 fib6_rules_cleanup();
2710 unregister_pernet_subsys(&ip6_route_net_ops);
2711 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);