[NETNS][IPV6]: Make sysctls route per namespace.
[safe/jmp/linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <net/net_namespace.h>
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52 #include <net/dst.h>
53 #include <net/xfrm.h>
54 #include <net/netevent.h>
55 #include <net/netlink.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74 #define CLONE_OFFLINK_ROUTE 0
75
76 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79 static void             ip6_dst_destroy(struct dst_entry *);
80 static void             ip6_dst_ifdown(struct dst_entry *,
81                                        struct net_device *dev, int how);
82 static int               ip6_dst_gc(void);
83
84 static int              ip6_pkt_discard(struct sk_buff *skb);
85 static int              ip6_pkt_discard_out(struct sk_buff *skb);
86 static void             ip6_link_failure(struct sk_buff *skb);
87 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
89 #ifdef CONFIG_IPV6_ROUTE_INFO
90 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
91                                            struct in6_addr *gwaddr, int ifindex,
92                                            unsigned pref);
93 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
94                                            struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static struct dst_ops ip6_dst_ops = {
98         .family                 =       AF_INET6,
99         .protocol               =       __constant_htons(ETH_P_IPV6),
100         .gc                     =       ip6_dst_gc,
101         .gc_thresh              =       1024,
102         .check                  =       ip6_dst_check,
103         .destroy                =       ip6_dst_destroy,
104         .ifdown                 =       ip6_dst_ifdown,
105         .negative_advice        =       ip6_negative_advice,
106         .link_failure           =       ip6_link_failure,
107         .update_pmtu            =       ip6_rt_update_pmtu,
108         .local_out              =       ip6_local_out,
109         .entry_size             =       sizeof(struct rt6_info),
110 };
111
112 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
113 {
114 }
115
116 static struct dst_ops ip6_dst_blackhole_ops = {
117         .family                 =       AF_INET6,
118         .protocol               =       __constant_htons(ETH_P_IPV6),
119         .destroy                =       ip6_dst_destroy,
120         .check                  =       ip6_dst_check,
121         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
122         .entry_size             =       sizeof(struct rt6_info),
123 };
124
125 struct rt6_info ip6_null_entry = {
126         .u = {
127                 .dst = {
128                         .__refcnt       = ATOMIC_INIT(1),
129                         .__use          = 1,
130                         .obsolete       = -1,
131                         .error          = -ENETUNREACH,
132                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
133                         .input          = ip6_pkt_discard,
134                         .output         = ip6_pkt_discard_out,
135                         .ops            = &ip6_dst_ops,
136                         .path           = (struct dst_entry*)&ip6_null_entry,
137                 }
138         },
139         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
140         .rt6i_metric    = ~(u32) 0,
141         .rt6i_ref       = ATOMIC_INIT(1),
142 };
143
144 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
145
146 static int ip6_pkt_prohibit(struct sk_buff *skb);
147 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
148
149 struct rt6_info ip6_prohibit_entry = {
150         .u = {
151                 .dst = {
152                         .__refcnt       = ATOMIC_INIT(1),
153                         .__use          = 1,
154                         .obsolete       = -1,
155                         .error          = -EACCES,
156                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
157                         .input          = ip6_pkt_prohibit,
158                         .output         = ip6_pkt_prohibit_out,
159                         .ops            = &ip6_dst_ops,
160                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
161                 }
162         },
163         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
164         .rt6i_metric    = ~(u32) 0,
165         .rt6i_ref       = ATOMIC_INIT(1),
166 };
167
168 struct rt6_info ip6_blk_hole_entry = {
169         .u = {
170                 .dst = {
171                         .__refcnt       = ATOMIC_INIT(1),
172                         .__use          = 1,
173                         .obsolete       = -1,
174                         .error          = -EINVAL,
175                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
176                         .input          = dst_discard,
177                         .output         = dst_discard,
178                         .ops            = &ip6_dst_ops,
179                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
180                 }
181         },
182         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
183         .rt6i_metric    = ~(u32) 0,
184         .rt6i_ref       = ATOMIC_INIT(1),
185 };
186
187 #endif
188
189 /* allocate dst with ip6_dst_ops */
190 static __inline__ struct rt6_info *ip6_dst_alloc(void)
191 {
192         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
193 }
194
195 static void ip6_dst_destroy(struct dst_entry *dst)
196 {
197         struct rt6_info *rt = (struct rt6_info *)dst;
198         struct inet6_dev *idev = rt->rt6i_idev;
199
200         if (idev != NULL) {
201                 rt->rt6i_idev = NULL;
202                 in6_dev_put(idev);
203         }
204 }
205
206 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
207                            int how)
208 {
209         struct rt6_info *rt = (struct rt6_info *)dst;
210         struct inet6_dev *idev = rt->rt6i_idev;
211         struct net_device *loopback_dev =
212                 dev->nd_net->loopback_dev;
213
214         if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
215                 struct inet6_dev *loopback_idev =
216                         in6_dev_get(loopback_dev);
217                 if (loopback_idev != NULL) {
218                         rt->rt6i_idev = loopback_idev;
219                         in6_dev_put(idev);
220                 }
221         }
222 }
223
224 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
225 {
226         return (rt->rt6i_flags & RTF_EXPIRES &&
227                 time_after(jiffies, rt->rt6i_expires));
228 }
229
230 static inline int rt6_need_strict(struct in6_addr *daddr)
231 {
232         return (ipv6_addr_type(daddr) &
233                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
234 }
235
236 /*
237  *      Route lookup. Any table->tb6_lock is implied.
238  */
239
240 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
241                                                     int oif,
242                                                     int strict)
243 {
244         struct rt6_info *local = NULL;
245         struct rt6_info *sprt;
246
247         if (oif) {
248                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
249                         struct net_device *dev = sprt->rt6i_dev;
250                         if (dev->ifindex == oif)
251                                 return sprt;
252                         if (dev->flags & IFF_LOOPBACK) {
253                                 if (sprt->rt6i_idev == NULL ||
254                                     sprt->rt6i_idev->dev->ifindex != oif) {
255                                         if (strict && oif)
256                                                 continue;
257                                         if (local && (!oif ||
258                                                       local->rt6i_idev->dev->ifindex == oif))
259                                                 continue;
260                                 }
261                                 local = sprt;
262                         }
263                 }
264
265                 if (local)
266                         return local;
267
268                 if (strict)
269                         return &ip6_null_entry;
270         }
271         return rt;
272 }
273
274 #ifdef CONFIG_IPV6_ROUTER_PREF
275 static void rt6_probe(struct rt6_info *rt)
276 {
277         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
278         /*
279          * Okay, this does not seem to be appropriate
280          * for now, however, we need to check if it
281          * is really so; aka Router Reachability Probing.
282          *
283          * Router Reachability Probe MUST be rate-limited
284          * to no more than one per minute.
285          */
286         if (!neigh || (neigh->nud_state & NUD_VALID))
287                 return;
288         read_lock_bh(&neigh->lock);
289         if (!(neigh->nud_state & NUD_VALID) &&
290             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
291                 struct in6_addr mcaddr;
292                 struct in6_addr *target;
293
294                 neigh->updated = jiffies;
295                 read_unlock_bh(&neigh->lock);
296
297                 target = (struct in6_addr *)&neigh->primary_key;
298                 addrconf_addr_solict_mult(target, &mcaddr);
299                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
300         } else
301                 read_unlock_bh(&neigh->lock);
302 }
303 #else
304 static inline void rt6_probe(struct rt6_info *rt)
305 {
306         return;
307 }
308 #endif
309
310 /*
311  * Default Router Selection (RFC 2461 6.3.6)
312  */
313 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
314 {
315         struct net_device *dev = rt->rt6i_dev;
316         if (!oif || dev->ifindex == oif)
317                 return 2;
318         if ((dev->flags & IFF_LOOPBACK) &&
319             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
320                 return 1;
321         return 0;
322 }
323
324 static inline int rt6_check_neigh(struct rt6_info *rt)
325 {
326         struct neighbour *neigh = rt->rt6i_nexthop;
327         int m;
328         if (rt->rt6i_flags & RTF_NONEXTHOP ||
329             !(rt->rt6i_flags & RTF_GATEWAY))
330                 m = 1;
331         else if (neigh) {
332                 read_lock_bh(&neigh->lock);
333                 if (neigh->nud_state & NUD_VALID)
334                         m = 2;
335 #ifdef CONFIG_IPV6_ROUTER_PREF
336                 else if (neigh->nud_state & NUD_FAILED)
337                         m = 0;
338 #endif
339                 else
340                         m = 1;
341                 read_unlock_bh(&neigh->lock);
342         } else
343                 m = 0;
344         return m;
345 }
346
347 static int rt6_score_route(struct rt6_info *rt, int oif,
348                            int strict)
349 {
350         int m, n;
351
352         m = rt6_check_dev(rt, oif);
353         if (!m && (strict & RT6_LOOKUP_F_IFACE))
354                 return -1;
355 #ifdef CONFIG_IPV6_ROUTER_PREF
356         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
357 #endif
358         n = rt6_check_neigh(rt);
359         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
360                 return -1;
361         return m;
362 }
363
364 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
365                                    int *mpri, struct rt6_info *match)
366 {
367         int m;
368
369         if (rt6_check_expired(rt))
370                 goto out;
371
372         m = rt6_score_route(rt, oif, strict);
373         if (m < 0)
374                 goto out;
375
376         if (m > *mpri) {
377                 if (strict & RT6_LOOKUP_F_REACHABLE)
378                         rt6_probe(match);
379                 *mpri = m;
380                 match = rt;
381         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
382                 rt6_probe(rt);
383         }
384
385 out:
386         return match;
387 }
388
389 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
390                                      struct rt6_info *rr_head,
391                                      u32 metric, int oif, int strict)
392 {
393         struct rt6_info *rt, *match;
394         int mpri = -1;
395
396         match = NULL;
397         for (rt = rr_head; rt && rt->rt6i_metric == metric;
398              rt = rt->u.dst.rt6_next)
399                 match = find_match(rt, oif, strict, &mpri, match);
400         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
401              rt = rt->u.dst.rt6_next)
402                 match = find_match(rt, oif, strict, &mpri, match);
403
404         return match;
405 }
406
407 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
408 {
409         struct rt6_info *match, *rt0;
410
411         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
412                   __FUNCTION__, fn->leaf, oif);
413
414         rt0 = fn->rr_ptr;
415         if (!rt0)
416                 fn->rr_ptr = rt0 = fn->leaf;
417
418         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
419
420         if (!match &&
421             (strict & RT6_LOOKUP_F_REACHABLE)) {
422                 struct rt6_info *next = rt0->u.dst.rt6_next;
423
424                 /* no entries matched; do round-robin */
425                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
426                         next = fn->leaf;
427
428                 if (next != rt0)
429                         fn->rr_ptr = next;
430         }
431
432         RT6_TRACE("%s() => %p\n",
433                   __FUNCTION__, match);
434
435         return (match ? match : &ip6_null_entry);
436 }
437
438 #ifdef CONFIG_IPV6_ROUTE_INFO
439 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
440                   struct in6_addr *gwaddr)
441 {
442         struct route_info *rinfo = (struct route_info *) opt;
443         struct in6_addr prefix_buf, *prefix;
444         unsigned int pref;
445         u32 lifetime;
446         struct rt6_info *rt;
447
448         if (len < sizeof(struct route_info)) {
449                 return -EINVAL;
450         }
451
452         /* Sanity check for prefix_len and length */
453         if (rinfo->length > 3) {
454                 return -EINVAL;
455         } else if (rinfo->prefix_len > 128) {
456                 return -EINVAL;
457         } else if (rinfo->prefix_len > 64) {
458                 if (rinfo->length < 2) {
459                         return -EINVAL;
460                 }
461         } else if (rinfo->prefix_len > 0) {
462                 if (rinfo->length < 1) {
463                         return -EINVAL;
464                 }
465         }
466
467         pref = rinfo->route_pref;
468         if (pref == ICMPV6_ROUTER_PREF_INVALID)
469                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
470
471         lifetime = ntohl(rinfo->lifetime);
472         if (lifetime == 0xffffffff) {
473                 /* infinity */
474         } else if (lifetime > 0x7fffffff/HZ) {
475                 /* Avoid arithmetic overflow */
476                 lifetime = 0x7fffffff/HZ - 1;
477         }
478
479         if (rinfo->length == 3)
480                 prefix = (struct in6_addr *)rinfo->prefix;
481         else {
482                 /* this function is safe */
483                 ipv6_addr_prefix(&prefix_buf,
484                                  (struct in6_addr *)rinfo->prefix,
485                                  rinfo->prefix_len);
486                 prefix = &prefix_buf;
487         }
488
489         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
490
491         if (rt && !lifetime) {
492                 ip6_del_rt(rt);
493                 rt = NULL;
494         }
495
496         if (!rt && lifetime)
497                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
498                                         pref);
499         else if (rt)
500                 rt->rt6i_flags = RTF_ROUTEINFO |
501                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
502
503         if (rt) {
504                 if (lifetime == 0xffffffff) {
505                         rt->rt6i_flags &= ~RTF_EXPIRES;
506                 } else {
507                         rt->rt6i_expires = jiffies + HZ * lifetime;
508                         rt->rt6i_flags |= RTF_EXPIRES;
509                 }
510                 dst_release(&rt->u.dst);
511         }
512         return 0;
513 }
514 #endif
515
516 #define BACKTRACK(saddr) \
517 do { \
518         if (rt == &ip6_null_entry) { \
519                 struct fib6_node *pn; \
520                 while (1) { \
521                         if (fn->fn_flags & RTN_TL_ROOT) \
522                                 goto out; \
523                         pn = fn->parent; \
524                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
525                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
526                         else \
527                                 fn = pn; \
528                         if (fn->fn_flags & RTN_RTINFO) \
529                                 goto restart; \
530                 } \
531         } \
532 } while(0)
533
534 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
535                                              struct flowi *fl, int flags)
536 {
537         struct fib6_node *fn;
538         struct rt6_info *rt;
539
540         read_lock_bh(&table->tb6_lock);
541         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
542 restart:
543         rt = fn->leaf;
544         rt = rt6_device_match(rt, fl->oif, flags);
545         BACKTRACK(&fl->fl6_src);
546 out:
547         dst_use(&rt->u.dst, jiffies);
548         read_unlock_bh(&table->tb6_lock);
549         return rt;
550
551 }
552
553 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
554                             int oif, int strict)
555 {
556         struct flowi fl = {
557                 .oif = oif,
558                 .nl_u = {
559                         .ip6_u = {
560                                 .daddr = *daddr,
561                         },
562                 },
563         };
564         struct dst_entry *dst;
565         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
566
567         if (saddr) {
568                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
569                 flags |= RT6_LOOKUP_F_HAS_SADDR;
570         }
571
572         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
573         if (dst->error == 0)
574                 return (struct rt6_info *) dst;
575
576         dst_release(dst);
577
578         return NULL;
579 }
580
581 EXPORT_SYMBOL(rt6_lookup);
582
583 /* ip6_ins_rt is called with FREE table->tb6_lock.
584    It takes new route entry, the addition fails by any reason the
585    route is freed. In any case, if caller does not hold it, it may
586    be destroyed.
587  */
588
589 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
590 {
591         int err;
592         struct fib6_table *table;
593
594         table = rt->rt6i_table;
595         write_lock_bh(&table->tb6_lock);
596         err = fib6_add(&table->tb6_root, rt, info);
597         write_unlock_bh(&table->tb6_lock);
598
599         return err;
600 }
601
602 int ip6_ins_rt(struct rt6_info *rt)
603 {
604         struct nl_info info = {};
605         return __ip6_ins_rt(rt, &info);
606 }
607
608 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
609                                       struct in6_addr *saddr)
610 {
611         struct rt6_info *rt;
612
613         /*
614          *      Clone the route.
615          */
616
617         rt = ip6_rt_copy(ort);
618
619         if (rt) {
620                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
621                         if (rt->rt6i_dst.plen != 128 &&
622                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
623                                 rt->rt6i_flags |= RTF_ANYCAST;
624                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
625                 }
626
627                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
628                 rt->rt6i_dst.plen = 128;
629                 rt->rt6i_flags |= RTF_CACHE;
630                 rt->u.dst.flags |= DST_HOST;
631
632 #ifdef CONFIG_IPV6_SUBTREES
633                 if (rt->rt6i_src.plen && saddr) {
634                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
635                         rt->rt6i_src.plen = 128;
636                 }
637 #endif
638
639                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
640
641         }
642
643         return rt;
644 }
645
646 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
647 {
648         struct rt6_info *rt = ip6_rt_copy(ort);
649         if (rt) {
650                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
651                 rt->rt6i_dst.plen = 128;
652                 rt->rt6i_flags |= RTF_CACHE;
653                 rt->u.dst.flags |= DST_HOST;
654                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
655         }
656         return rt;
657 }
658
659 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
660                                             struct flowi *fl, int flags)
661 {
662         struct fib6_node *fn;
663         struct rt6_info *rt, *nrt;
664         int strict = 0;
665         int attempts = 3;
666         int err;
667         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
668
669         strict |= flags & RT6_LOOKUP_F_IFACE;
670
671 relookup:
672         read_lock_bh(&table->tb6_lock);
673
674 restart_2:
675         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
676
677 restart:
678         rt = rt6_select(fn, oif, strict | reachable);
679         BACKTRACK(&fl->fl6_src);
680         if (rt == &ip6_null_entry ||
681             rt->rt6i_flags & RTF_CACHE)
682                 goto out;
683
684         dst_hold(&rt->u.dst);
685         read_unlock_bh(&table->tb6_lock);
686
687         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
688                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
689         else {
690 #if CLONE_OFFLINK_ROUTE
691                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
692 #else
693                 goto out2;
694 #endif
695         }
696
697         dst_release(&rt->u.dst);
698         rt = nrt ? : &ip6_null_entry;
699
700         dst_hold(&rt->u.dst);
701         if (nrt) {
702                 err = ip6_ins_rt(nrt);
703                 if (!err)
704                         goto out2;
705         }
706
707         if (--attempts <= 0)
708                 goto out2;
709
710         /*
711          * Race condition! In the gap, when table->tb6_lock was
712          * released someone could insert this route.  Relookup.
713          */
714         dst_release(&rt->u.dst);
715         goto relookup;
716
717 out:
718         if (reachable) {
719                 reachable = 0;
720                 goto restart_2;
721         }
722         dst_hold(&rt->u.dst);
723         read_unlock_bh(&table->tb6_lock);
724 out2:
725         rt->u.dst.lastuse = jiffies;
726         rt->u.dst.__use++;
727
728         return rt;
729 }
730
731 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
732                                             struct flowi *fl, int flags)
733 {
734         return ip6_pol_route(table, fl->iif, fl, flags);
735 }
736
737 void ip6_route_input(struct sk_buff *skb)
738 {
739         struct ipv6hdr *iph = ipv6_hdr(skb);
740         int flags = RT6_LOOKUP_F_HAS_SADDR;
741         struct flowi fl = {
742                 .iif = skb->dev->ifindex,
743                 .nl_u = {
744                         .ip6_u = {
745                                 .daddr = iph->daddr,
746                                 .saddr = iph->saddr,
747                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
748                         },
749                 },
750                 .mark = skb->mark,
751                 .proto = iph->nexthdr,
752         };
753
754         if (rt6_need_strict(&iph->daddr))
755                 flags |= RT6_LOOKUP_F_IFACE;
756
757         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
758 }
759
760 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
761                                              struct flowi *fl, int flags)
762 {
763         return ip6_pol_route(table, fl->oif, fl, flags);
764 }
765
766 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
767 {
768         int flags = 0;
769
770         if (rt6_need_strict(&fl->fl6_dst))
771                 flags |= RT6_LOOKUP_F_IFACE;
772
773         if (!ipv6_addr_any(&fl->fl6_src))
774                 flags |= RT6_LOOKUP_F_HAS_SADDR;
775
776         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
777 }
778
779 EXPORT_SYMBOL(ip6_route_output);
780
781 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
782 {
783         struct rt6_info *ort = (struct rt6_info *) *dstp;
784         struct rt6_info *rt = (struct rt6_info *)
785                 dst_alloc(&ip6_dst_blackhole_ops);
786         struct dst_entry *new = NULL;
787
788         if (rt) {
789                 new = &rt->u.dst;
790
791                 atomic_set(&new->__refcnt, 1);
792                 new->__use = 1;
793                 new->input = dst_discard;
794                 new->output = dst_discard;
795
796                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
797                 new->dev = ort->u.dst.dev;
798                 if (new->dev)
799                         dev_hold(new->dev);
800                 rt->rt6i_idev = ort->rt6i_idev;
801                 if (rt->rt6i_idev)
802                         in6_dev_hold(rt->rt6i_idev);
803                 rt->rt6i_expires = 0;
804
805                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
806                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
807                 rt->rt6i_metric = 0;
808
809                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
810 #ifdef CONFIG_IPV6_SUBTREES
811                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
812 #endif
813
814                 dst_free(new);
815         }
816
817         dst_release(*dstp);
818         *dstp = new;
819         return (new ? 0 : -ENOMEM);
820 }
821 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
822
823 /*
824  *      Destination cache support functions
825  */
826
827 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
828 {
829         struct rt6_info *rt;
830
831         rt = (struct rt6_info *) dst;
832
833         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
834                 return dst;
835
836         return NULL;
837 }
838
839 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
840 {
841         struct rt6_info *rt = (struct rt6_info *) dst;
842
843         if (rt) {
844                 if (rt->rt6i_flags & RTF_CACHE)
845                         ip6_del_rt(rt);
846                 else
847                         dst_release(dst);
848         }
849         return NULL;
850 }
851
852 static void ip6_link_failure(struct sk_buff *skb)
853 {
854         struct rt6_info *rt;
855
856         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
857
858         rt = (struct rt6_info *) skb->dst;
859         if (rt) {
860                 if (rt->rt6i_flags&RTF_CACHE) {
861                         dst_set_expires(&rt->u.dst, 0);
862                         rt->rt6i_flags |= RTF_EXPIRES;
863                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
864                         rt->rt6i_node->fn_sernum = -1;
865         }
866 }
867
868 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
869 {
870         struct rt6_info *rt6 = (struct rt6_info*)dst;
871
872         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
873                 rt6->rt6i_flags |= RTF_MODIFIED;
874                 if (mtu < IPV6_MIN_MTU) {
875                         mtu = IPV6_MIN_MTU;
876                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
877                 }
878                 dst->metrics[RTAX_MTU-1] = mtu;
879                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
880         }
881 }
882
883 static int ipv6_get_mtu(struct net_device *dev);
884
885 static inline unsigned int ipv6_advmss(unsigned int mtu)
886 {
887         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
888
889         if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
890                 mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
891
892         /*
893          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
894          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
895          * IPV6_MAXPLEN is also valid and means: "any MSS,
896          * rely only on pmtu discovery"
897          */
898         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
899                 mtu = IPV6_MAXPLEN;
900         return mtu;
901 }
902
903 static struct dst_entry *ndisc_dst_gc_list;
904 static DEFINE_SPINLOCK(ndisc_lock);
905
906 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
907                                   struct neighbour *neigh,
908                                   struct in6_addr *addr,
909                                   int (*output)(struct sk_buff *))
910 {
911         struct rt6_info *rt;
912         struct inet6_dev *idev = in6_dev_get(dev);
913
914         if (unlikely(idev == NULL))
915                 return NULL;
916
917         rt = ip6_dst_alloc();
918         if (unlikely(rt == NULL)) {
919                 in6_dev_put(idev);
920                 goto out;
921         }
922
923         dev_hold(dev);
924         if (neigh)
925                 neigh_hold(neigh);
926         else
927                 neigh = ndisc_get_neigh(dev, addr);
928
929         rt->rt6i_dev      = dev;
930         rt->rt6i_idev     = idev;
931         rt->rt6i_nexthop  = neigh;
932         atomic_set(&rt->u.dst.__refcnt, 1);
933         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
934         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
935         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
936         rt->u.dst.output  = output;
937
938 #if 0   /* there's no chance to use these for ndisc */
939         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
940                                 ? DST_HOST
941                                 : 0;
942         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
943         rt->rt6i_dst.plen = 128;
944 #endif
945
946         spin_lock_bh(&ndisc_lock);
947         rt->u.dst.next = ndisc_dst_gc_list;
948         ndisc_dst_gc_list = &rt->u.dst;
949         spin_unlock_bh(&ndisc_lock);
950
951         fib6_force_start_gc();
952
953 out:
954         return &rt->u.dst;
955 }
956
957 int ndisc_dst_gc(int *more)
958 {
959         struct dst_entry *dst, *next, **pprev;
960         int freed;
961
962         next = NULL;
963         freed = 0;
964
965         spin_lock_bh(&ndisc_lock);
966         pprev = &ndisc_dst_gc_list;
967
968         while ((dst = *pprev) != NULL) {
969                 if (!atomic_read(&dst->__refcnt)) {
970                         *pprev = dst->next;
971                         dst_free(dst);
972                         freed++;
973                 } else {
974                         pprev = &dst->next;
975                         (*more)++;
976                 }
977         }
978
979         spin_unlock_bh(&ndisc_lock);
980
981         return freed;
982 }
983
984 static int ip6_dst_gc(void)
985 {
986         static unsigned expire = 30*HZ;
987         static unsigned long last_gc;
988         unsigned long now = jiffies;
989
990         if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
991             atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
992                 goto out;
993
994         expire++;
995         fib6_run_gc(expire);
996         last_gc = now;
997         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
998                 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
999
1000 out:
1001         expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1002         return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1003 }
1004
1005 /* Clean host part of a prefix. Not necessary in radix tree,
1006    but results in cleaner routing tables.
1007
1008    Remove it only when all the things will work!
1009  */
1010
1011 static int ipv6_get_mtu(struct net_device *dev)
1012 {
1013         int mtu = IPV6_MIN_MTU;
1014         struct inet6_dev *idev;
1015
1016         idev = in6_dev_get(dev);
1017         if (idev) {
1018                 mtu = idev->cnf.mtu6;
1019                 in6_dev_put(idev);
1020         }
1021         return mtu;
1022 }
1023
1024 int ipv6_get_hoplimit(struct net_device *dev)
1025 {
1026         int hoplimit = ipv6_devconf.hop_limit;
1027         struct inet6_dev *idev;
1028
1029         idev = in6_dev_get(dev);
1030         if (idev) {
1031                 hoplimit = idev->cnf.hop_limit;
1032                 in6_dev_put(idev);
1033         }
1034         return hoplimit;
1035 }
1036
1037 /*
1038  *
1039  */
1040
1041 int ip6_route_add(struct fib6_config *cfg)
1042 {
1043         int err;
1044         struct rt6_info *rt = NULL;
1045         struct net_device *dev = NULL;
1046         struct inet6_dev *idev = NULL;
1047         struct fib6_table *table;
1048         int addr_type;
1049
1050         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1051                 return -EINVAL;
1052 #ifndef CONFIG_IPV6_SUBTREES
1053         if (cfg->fc_src_len)
1054                 return -EINVAL;
1055 #endif
1056         if (cfg->fc_ifindex) {
1057                 err = -ENODEV;
1058                 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1059                 if (!dev)
1060                         goto out;
1061                 idev = in6_dev_get(dev);
1062                 if (!idev)
1063                         goto out;
1064         }
1065
1066         if (cfg->fc_metric == 0)
1067                 cfg->fc_metric = IP6_RT_PRIO_USER;
1068
1069         table = fib6_new_table(cfg->fc_table);
1070         if (table == NULL) {
1071                 err = -ENOBUFS;
1072                 goto out;
1073         }
1074
1075         rt = ip6_dst_alloc();
1076
1077         if (rt == NULL) {
1078                 err = -ENOMEM;
1079                 goto out;
1080         }
1081
1082         rt->u.dst.obsolete = -1;
1083         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1084
1085         if (cfg->fc_protocol == RTPROT_UNSPEC)
1086                 cfg->fc_protocol = RTPROT_BOOT;
1087         rt->rt6i_protocol = cfg->fc_protocol;
1088
1089         addr_type = ipv6_addr_type(&cfg->fc_dst);
1090
1091         if (addr_type & IPV6_ADDR_MULTICAST)
1092                 rt->u.dst.input = ip6_mc_input;
1093         else
1094                 rt->u.dst.input = ip6_forward;
1095
1096         rt->u.dst.output = ip6_output;
1097
1098         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1099         rt->rt6i_dst.plen = cfg->fc_dst_len;
1100         if (rt->rt6i_dst.plen == 128)
1101                rt->u.dst.flags = DST_HOST;
1102
1103 #ifdef CONFIG_IPV6_SUBTREES
1104         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1105         rt->rt6i_src.plen = cfg->fc_src_len;
1106 #endif
1107
1108         rt->rt6i_metric = cfg->fc_metric;
1109
1110         /* We cannot add true routes via loopback here,
1111            they would result in kernel looping; promote them to reject routes
1112          */
1113         if ((cfg->fc_flags & RTF_REJECT) ||
1114             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1115                 /* hold loopback dev/idev if we haven't done so. */
1116                 if (dev != init_net.loopback_dev) {
1117                         if (dev) {
1118                                 dev_put(dev);
1119                                 in6_dev_put(idev);
1120                         }
1121                         dev = init_net.loopback_dev;
1122                         dev_hold(dev);
1123                         idev = in6_dev_get(dev);
1124                         if (!idev) {
1125                                 err = -ENODEV;
1126                                 goto out;
1127                         }
1128                 }
1129                 rt->u.dst.output = ip6_pkt_discard_out;
1130                 rt->u.dst.input = ip6_pkt_discard;
1131                 rt->u.dst.error = -ENETUNREACH;
1132                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1133                 goto install_route;
1134         }
1135
1136         if (cfg->fc_flags & RTF_GATEWAY) {
1137                 struct in6_addr *gw_addr;
1138                 int gwa_type;
1139
1140                 gw_addr = &cfg->fc_gateway;
1141                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1142                 gwa_type = ipv6_addr_type(gw_addr);
1143
1144                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1145                         struct rt6_info *grt;
1146
1147                         /* IPv6 strictly inhibits using not link-local
1148                            addresses as nexthop address.
1149                            Otherwise, router will not able to send redirects.
1150                            It is very good, but in some (rare!) circumstances
1151                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1152                            some exceptions. --ANK
1153                          */
1154                         err = -EINVAL;
1155                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1156                                 goto out;
1157
1158                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1159
1160                         err = -EHOSTUNREACH;
1161                         if (grt == NULL)
1162                                 goto out;
1163                         if (dev) {
1164                                 if (dev != grt->rt6i_dev) {
1165                                         dst_release(&grt->u.dst);
1166                                         goto out;
1167                                 }
1168                         } else {
1169                                 dev = grt->rt6i_dev;
1170                                 idev = grt->rt6i_idev;
1171                                 dev_hold(dev);
1172                                 in6_dev_hold(grt->rt6i_idev);
1173                         }
1174                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1175                                 err = 0;
1176                         dst_release(&grt->u.dst);
1177
1178                         if (err)
1179                                 goto out;
1180                 }
1181                 err = -EINVAL;
1182                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1183                         goto out;
1184         }
1185
1186         err = -ENODEV;
1187         if (dev == NULL)
1188                 goto out;
1189
1190         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1191                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1192                 if (IS_ERR(rt->rt6i_nexthop)) {
1193                         err = PTR_ERR(rt->rt6i_nexthop);
1194                         rt->rt6i_nexthop = NULL;
1195                         goto out;
1196                 }
1197         }
1198
1199         rt->rt6i_flags = cfg->fc_flags;
1200
1201 install_route:
1202         if (cfg->fc_mx) {
1203                 struct nlattr *nla;
1204                 int remaining;
1205
1206                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1207                         int type = nla_type(nla);
1208
1209                         if (type) {
1210                                 if (type > RTAX_MAX) {
1211                                         err = -EINVAL;
1212                                         goto out;
1213                                 }
1214
1215                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1216                         }
1217                 }
1218         }
1219
1220         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1221                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1222         if (!rt->u.dst.metrics[RTAX_MTU-1])
1223                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1224         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1225                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1226         rt->u.dst.dev = dev;
1227         rt->rt6i_idev = idev;
1228         rt->rt6i_table = table;
1229         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1230
1231 out:
1232         if (dev)
1233                 dev_put(dev);
1234         if (idev)
1235                 in6_dev_put(idev);
1236         if (rt)
1237                 dst_free(&rt->u.dst);
1238         return err;
1239 }
1240
1241 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1242 {
1243         int err;
1244         struct fib6_table *table;
1245
1246         if (rt == &ip6_null_entry)
1247                 return -ENOENT;
1248
1249         table = rt->rt6i_table;
1250         write_lock_bh(&table->tb6_lock);
1251
1252         err = fib6_del(rt, info);
1253         dst_release(&rt->u.dst);
1254
1255         write_unlock_bh(&table->tb6_lock);
1256
1257         return err;
1258 }
1259
1260 int ip6_del_rt(struct rt6_info *rt)
1261 {
1262         struct nl_info info = {};
1263         return __ip6_del_rt(rt, &info);
1264 }
1265
1266 static int ip6_route_del(struct fib6_config *cfg)
1267 {
1268         struct fib6_table *table;
1269         struct fib6_node *fn;
1270         struct rt6_info *rt;
1271         int err = -ESRCH;
1272
1273         table = fib6_get_table(cfg->fc_table);
1274         if (table == NULL)
1275                 return err;
1276
1277         read_lock_bh(&table->tb6_lock);
1278
1279         fn = fib6_locate(&table->tb6_root,
1280                          &cfg->fc_dst, cfg->fc_dst_len,
1281                          &cfg->fc_src, cfg->fc_src_len);
1282
1283         if (fn) {
1284                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1285                         if (cfg->fc_ifindex &&
1286                             (rt->rt6i_dev == NULL ||
1287                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1288                                 continue;
1289                         if (cfg->fc_flags & RTF_GATEWAY &&
1290                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1291                                 continue;
1292                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1293                                 continue;
1294                         dst_hold(&rt->u.dst);
1295                         read_unlock_bh(&table->tb6_lock);
1296
1297                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1298                 }
1299         }
1300         read_unlock_bh(&table->tb6_lock);
1301
1302         return err;
1303 }
1304
1305 /*
1306  *      Handle redirects
1307  */
1308 struct ip6rd_flowi {
1309         struct flowi fl;
1310         struct in6_addr gateway;
1311 };
1312
1313 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1314                                              struct flowi *fl,
1315                                              int flags)
1316 {
1317         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1318         struct rt6_info *rt;
1319         struct fib6_node *fn;
1320
1321         /*
1322          * Get the "current" route for this destination and
1323          * check if the redirect has come from approriate router.
1324          *
1325          * RFC 2461 specifies that redirects should only be
1326          * accepted if they come from the nexthop to the target.
1327          * Due to the way the routes are chosen, this notion
1328          * is a bit fuzzy and one might need to check all possible
1329          * routes.
1330          */
1331
1332         read_lock_bh(&table->tb6_lock);
1333         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1334 restart:
1335         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1336                 /*
1337                  * Current route is on-link; redirect is always invalid.
1338                  *
1339                  * Seems, previous statement is not true. It could
1340                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1341                  * But then router serving it might decide, that we should
1342                  * know truth 8)8) --ANK (980726).
1343                  */
1344                 if (rt6_check_expired(rt))
1345                         continue;
1346                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1347                         continue;
1348                 if (fl->oif != rt->rt6i_dev->ifindex)
1349                         continue;
1350                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1351                         continue;
1352                 break;
1353         }
1354
1355         if (!rt)
1356                 rt = &ip6_null_entry;
1357         BACKTRACK(&fl->fl6_src);
1358 out:
1359         dst_hold(&rt->u.dst);
1360
1361         read_unlock_bh(&table->tb6_lock);
1362
1363         return rt;
1364 };
1365
1366 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1367                                            struct in6_addr *src,
1368                                            struct in6_addr *gateway,
1369                                            struct net_device *dev)
1370 {
1371         int flags = RT6_LOOKUP_F_HAS_SADDR;
1372         struct ip6rd_flowi rdfl = {
1373                 .fl = {
1374                         .oif = dev->ifindex,
1375                         .nl_u = {
1376                                 .ip6_u = {
1377                                         .daddr = *dest,
1378                                         .saddr = *src,
1379                                 },
1380                         },
1381                 },
1382                 .gateway = *gateway,
1383         };
1384
1385         if (rt6_need_strict(dest))
1386                 flags |= RT6_LOOKUP_F_IFACE;
1387
1388         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1389 }
1390
1391 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1392                   struct in6_addr *saddr,
1393                   struct neighbour *neigh, u8 *lladdr, int on_link)
1394 {
1395         struct rt6_info *rt, *nrt = NULL;
1396         struct netevent_redirect netevent;
1397
1398         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1399
1400         if (rt == &ip6_null_entry) {
1401                 if (net_ratelimit())
1402                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1403                                "for redirect target\n");
1404                 goto out;
1405         }
1406
1407         /*
1408          *      We have finally decided to accept it.
1409          */
1410
1411         neigh_update(neigh, lladdr, NUD_STALE,
1412                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1413                      NEIGH_UPDATE_F_OVERRIDE|
1414                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1415                                      NEIGH_UPDATE_F_ISROUTER))
1416                      );
1417
1418         /*
1419          * Redirect received -> path was valid.
1420          * Look, redirects are sent only in response to data packets,
1421          * so that this nexthop apparently is reachable. --ANK
1422          */
1423         dst_confirm(&rt->u.dst);
1424
1425         /* Duplicate redirect: silently ignore. */
1426         if (neigh == rt->u.dst.neighbour)
1427                 goto out;
1428
1429         nrt = ip6_rt_copy(rt);
1430         if (nrt == NULL)
1431                 goto out;
1432
1433         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1434         if (on_link)
1435                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1436
1437         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1438         nrt->rt6i_dst.plen = 128;
1439         nrt->u.dst.flags |= DST_HOST;
1440
1441         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1442         nrt->rt6i_nexthop = neigh_clone(neigh);
1443         /* Reset pmtu, it may be better */
1444         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1445         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1446
1447         if (ip6_ins_rt(nrt))
1448                 goto out;
1449
1450         netevent.old = &rt->u.dst;
1451         netevent.new = &nrt->u.dst;
1452         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1453
1454         if (rt->rt6i_flags&RTF_CACHE) {
1455                 ip6_del_rt(rt);
1456                 return;
1457         }
1458
1459 out:
1460         dst_release(&rt->u.dst);
1461         return;
1462 }
1463
1464 /*
1465  *      Handle ICMP "packet too big" messages
1466  *      i.e. Path MTU discovery
1467  */
1468
1469 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1470                         struct net_device *dev, u32 pmtu)
1471 {
1472         struct rt6_info *rt, *nrt;
1473         int allfrag = 0;
1474
1475         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1476         if (rt == NULL)
1477                 return;
1478
1479         if (pmtu >= dst_mtu(&rt->u.dst))
1480                 goto out;
1481
1482         if (pmtu < IPV6_MIN_MTU) {
1483                 /*
1484                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1485                  * MTU (1280) and a fragment header should always be included
1486                  * after a node receiving Too Big message reporting PMTU is
1487                  * less than the IPv6 Minimum Link MTU.
1488                  */
1489                 pmtu = IPV6_MIN_MTU;
1490                 allfrag = 1;
1491         }
1492
1493         /* New mtu received -> path was valid.
1494            They are sent only in response to data packets,
1495            so that this nexthop apparently is reachable. --ANK
1496          */
1497         dst_confirm(&rt->u.dst);
1498
1499         /* Host route. If it is static, it would be better
1500            not to override it, but add new one, so that
1501            when cache entry will expire old pmtu
1502            would return automatically.
1503          */
1504         if (rt->rt6i_flags & RTF_CACHE) {
1505                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1506                 if (allfrag)
1507                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1508                 dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1509                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1510                 goto out;
1511         }
1512
1513         /* Network route.
1514            Two cases are possible:
1515            1. It is connected route. Action: COW
1516            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1517          */
1518         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1519                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1520         else
1521                 nrt = rt6_alloc_clone(rt, daddr);
1522
1523         if (nrt) {
1524                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1525                 if (allfrag)
1526                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1527
1528                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1529                  * happened within 5 mins, the recommended timer is 10 mins.
1530                  * Here this route expiration time is set to ip6_rt_mtu_expires
1531                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1532                  * and detecting PMTU increase will be automatically happened.
1533                  */
1534                 dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1535                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1536
1537                 ip6_ins_rt(nrt);
1538         }
1539 out:
1540         dst_release(&rt->u.dst);
1541 }
1542
1543 /*
1544  *      Misc support functions
1545  */
1546
1547 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1548 {
1549         struct rt6_info *rt = ip6_dst_alloc();
1550
1551         if (rt) {
1552                 rt->u.dst.input = ort->u.dst.input;
1553                 rt->u.dst.output = ort->u.dst.output;
1554
1555                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1556                 rt->u.dst.error = ort->u.dst.error;
1557                 rt->u.dst.dev = ort->u.dst.dev;
1558                 if (rt->u.dst.dev)
1559                         dev_hold(rt->u.dst.dev);
1560                 rt->rt6i_idev = ort->rt6i_idev;
1561                 if (rt->rt6i_idev)
1562                         in6_dev_hold(rt->rt6i_idev);
1563                 rt->u.dst.lastuse = jiffies;
1564                 rt->rt6i_expires = 0;
1565
1566                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1567                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1568                 rt->rt6i_metric = 0;
1569
1570                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1571 #ifdef CONFIG_IPV6_SUBTREES
1572                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1573 #endif
1574                 rt->rt6i_table = ort->rt6i_table;
1575         }
1576         return rt;
1577 }
1578
1579 #ifdef CONFIG_IPV6_ROUTE_INFO
1580 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1581                                            struct in6_addr *gwaddr, int ifindex)
1582 {
1583         struct fib6_node *fn;
1584         struct rt6_info *rt = NULL;
1585         struct fib6_table *table;
1586
1587         table = fib6_get_table(RT6_TABLE_INFO);
1588         if (table == NULL)
1589                 return NULL;
1590
1591         write_lock_bh(&table->tb6_lock);
1592         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1593         if (!fn)
1594                 goto out;
1595
1596         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1597                 if (rt->rt6i_dev->ifindex != ifindex)
1598                         continue;
1599                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1600                         continue;
1601                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1602                         continue;
1603                 dst_hold(&rt->u.dst);
1604                 break;
1605         }
1606 out:
1607         write_unlock_bh(&table->tb6_lock);
1608         return rt;
1609 }
1610
1611 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1612                                            struct in6_addr *gwaddr, int ifindex,
1613                                            unsigned pref)
1614 {
1615         struct fib6_config cfg = {
1616                 .fc_table       = RT6_TABLE_INFO,
1617                 .fc_metric      = 1024,
1618                 .fc_ifindex     = ifindex,
1619                 .fc_dst_len     = prefixlen,
1620                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1621                                   RTF_UP | RTF_PREF(pref),
1622         };
1623
1624         ipv6_addr_copy(&cfg.fc_dst, prefix);
1625         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1626
1627         /* We should treat it as a default route if prefix length is 0. */
1628         if (!prefixlen)
1629                 cfg.fc_flags |= RTF_DEFAULT;
1630
1631         ip6_route_add(&cfg);
1632
1633         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1634 }
1635 #endif
1636
1637 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1638 {
1639         struct rt6_info *rt;
1640         struct fib6_table *table;
1641
1642         table = fib6_get_table(RT6_TABLE_DFLT);
1643         if (table == NULL)
1644                 return NULL;
1645
1646         write_lock_bh(&table->tb6_lock);
1647         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1648                 if (dev == rt->rt6i_dev &&
1649                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1650                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1651                         break;
1652         }
1653         if (rt)
1654                 dst_hold(&rt->u.dst);
1655         write_unlock_bh(&table->tb6_lock);
1656         return rt;
1657 }
1658
1659 EXPORT_SYMBOL(rt6_get_dflt_router);
1660
1661 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1662                                      struct net_device *dev,
1663                                      unsigned int pref)
1664 {
1665         struct fib6_config cfg = {
1666                 .fc_table       = RT6_TABLE_DFLT,
1667                 .fc_metric      = 1024,
1668                 .fc_ifindex     = dev->ifindex,
1669                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1670                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1671         };
1672
1673         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1674
1675         ip6_route_add(&cfg);
1676
1677         return rt6_get_dflt_router(gwaddr, dev);
1678 }
1679
1680 void rt6_purge_dflt_routers(void)
1681 {
1682         struct rt6_info *rt;
1683         struct fib6_table *table;
1684
1685         /* NOTE: Keep consistent with rt6_get_dflt_router */
1686         table = fib6_get_table(RT6_TABLE_DFLT);
1687         if (table == NULL)
1688                 return;
1689
1690 restart:
1691         read_lock_bh(&table->tb6_lock);
1692         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1693                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1694                         dst_hold(&rt->u.dst);
1695                         read_unlock_bh(&table->tb6_lock);
1696                         ip6_del_rt(rt);
1697                         goto restart;
1698                 }
1699         }
1700         read_unlock_bh(&table->tb6_lock);
1701 }
1702
1703 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1704                                  struct fib6_config *cfg)
1705 {
1706         memset(cfg, 0, sizeof(*cfg));
1707
1708         cfg->fc_table = RT6_TABLE_MAIN;
1709         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1710         cfg->fc_metric = rtmsg->rtmsg_metric;
1711         cfg->fc_expires = rtmsg->rtmsg_info;
1712         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1713         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1714         cfg->fc_flags = rtmsg->rtmsg_flags;
1715
1716         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1717         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1718         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1719 }
1720
1721 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1722 {
1723         struct fib6_config cfg;
1724         struct in6_rtmsg rtmsg;
1725         int err;
1726
1727         switch(cmd) {
1728         case SIOCADDRT:         /* Add a route */
1729         case SIOCDELRT:         /* Delete a route */
1730                 if (!capable(CAP_NET_ADMIN))
1731                         return -EPERM;
1732                 err = copy_from_user(&rtmsg, arg,
1733                                      sizeof(struct in6_rtmsg));
1734                 if (err)
1735                         return -EFAULT;
1736
1737                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1738
1739                 rtnl_lock();
1740                 switch (cmd) {
1741                 case SIOCADDRT:
1742                         err = ip6_route_add(&cfg);
1743                         break;
1744                 case SIOCDELRT:
1745                         err = ip6_route_del(&cfg);
1746                         break;
1747                 default:
1748                         err = -EINVAL;
1749                 }
1750                 rtnl_unlock();
1751
1752                 return err;
1753         }
1754
1755         return -EINVAL;
1756 }
1757
1758 /*
1759  *      Drop the packet on the floor
1760  */
1761
1762 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1763                                int ipstats_mib_noroutes)
1764 {
1765         int type;
1766         switch (ipstats_mib_noroutes) {
1767         case IPSTATS_MIB_INNOROUTES:
1768                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1769                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1770                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1771                         break;
1772                 }
1773                 /* FALLTHROUGH */
1774         case IPSTATS_MIB_OUTNOROUTES:
1775                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1776                 break;
1777         }
1778         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1779         kfree_skb(skb);
1780         return 0;
1781 }
1782
1783 static int ip6_pkt_discard(struct sk_buff *skb)
1784 {
1785         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1786 }
1787
1788 static int ip6_pkt_discard_out(struct sk_buff *skb)
1789 {
1790         skb->dev = skb->dst->dev;
1791         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1792 }
1793
1794 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1795
1796 static int ip6_pkt_prohibit(struct sk_buff *skb)
1797 {
1798         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1799 }
1800
1801 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1802 {
1803         skb->dev = skb->dst->dev;
1804         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1805 }
1806
1807 #endif
1808
1809 /*
1810  *      Allocate a dst for local (unicast / anycast) address.
1811  */
1812
1813 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1814                                     const struct in6_addr *addr,
1815                                     int anycast)
1816 {
1817         struct rt6_info *rt = ip6_dst_alloc();
1818
1819         if (rt == NULL)
1820                 return ERR_PTR(-ENOMEM);
1821
1822         dev_hold(init_net.loopback_dev);
1823         in6_dev_hold(idev);
1824
1825         rt->u.dst.flags = DST_HOST;
1826         rt->u.dst.input = ip6_input;
1827         rt->u.dst.output = ip6_output;
1828         rt->rt6i_dev = init_net.loopback_dev;
1829         rt->rt6i_idev = idev;
1830         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1831         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1832         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1833         rt->u.dst.obsolete = -1;
1834
1835         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1836         if (anycast)
1837                 rt->rt6i_flags |= RTF_ANYCAST;
1838         else
1839                 rt->rt6i_flags |= RTF_LOCAL;
1840         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1841         if (rt->rt6i_nexthop == NULL) {
1842                 dst_free(&rt->u.dst);
1843                 return ERR_PTR(-ENOMEM);
1844         }
1845
1846         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1847         rt->rt6i_dst.plen = 128;
1848         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1849
1850         atomic_set(&rt->u.dst.__refcnt, 1);
1851
1852         return rt;
1853 }
1854
1855 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1856 {
1857         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1858             rt != &ip6_null_entry) {
1859                 RT6_TRACE("deleted by ifdown %p\n", rt);
1860                 return -1;
1861         }
1862         return 0;
1863 }
1864
1865 void rt6_ifdown(struct net_device *dev)
1866 {
1867         fib6_clean_all(fib6_ifdown, 0, dev);
1868 }
1869
1870 struct rt6_mtu_change_arg
1871 {
1872         struct net_device *dev;
1873         unsigned mtu;
1874 };
1875
1876 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1877 {
1878         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1879         struct inet6_dev *idev;
1880
1881         /* In IPv6 pmtu discovery is not optional,
1882            so that RTAX_MTU lock cannot disable it.
1883            We still use this lock to block changes
1884            caused by addrconf/ndisc.
1885         */
1886
1887         idev = __in6_dev_get(arg->dev);
1888         if (idev == NULL)
1889                 return 0;
1890
1891         /* For administrative MTU increase, there is no way to discover
1892            IPv6 PMTU increase, so PMTU increase should be updated here.
1893            Since RFC 1981 doesn't include administrative MTU increase
1894            update PMTU increase is a MUST. (i.e. jumbo frame)
1895          */
1896         /*
1897            If new MTU is less than route PMTU, this new MTU will be the
1898            lowest MTU in the path, update the route PMTU to reflect PMTU
1899            decreases; if new MTU is greater than route PMTU, and the
1900            old MTU is the lowest MTU in the path, update the route PMTU
1901            to reflect the increase. In this case if the other nodes' MTU
1902            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1903            PMTU discouvery.
1904          */
1905         if (rt->rt6i_dev == arg->dev &&
1906             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1907             (dst_mtu(&rt->u.dst) > arg->mtu ||
1908              (dst_mtu(&rt->u.dst) < arg->mtu &&
1909               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1910                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1911                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1912         }
1913         return 0;
1914 }
1915
1916 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1917 {
1918         struct rt6_mtu_change_arg arg = {
1919                 .dev = dev,
1920                 .mtu = mtu,
1921         };
1922
1923         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1924 }
1925
1926 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1927         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1928         [RTA_OIF]               = { .type = NLA_U32 },
1929         [RTA_IIF]               = { .type = NLA_U32 },
1930         [RTA_PRIORITY]          = { .type = NLA_U32 },
1931         [RTA_METRICS]           = { .type = NLA_NESTED },
1932 };
1933
1934 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1935                               struct fib6_config *cfg)
1936 {
1937         struct rtmsg *rtm;
1938         struct nlattr *tb[RTA_MAX+1];
1939         int err;
1940
1941         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1942         if (err < 0)
1943                 goto errout;
1944
1945         err = -EINVAL;
1946         rtm = nlmsg_data(nlh);
1947         memset(cfg, 0, sizeof(*cfg));
1948
1949         cfg->fc_table = rtm->rtm_table;
1950         cfg->fc_dst_len = rtm->rtm_dst_len;
1951         cfg->fc_src_len = rtm->rtm_src_len;
1952         cfg->fc_flags = RTF_UP;
1953         cfg->fc_protocol = rtm->rtm_protocol;
1954
1955         if (rtm->rtm_type == RTN_UNREACHABLE)
1956                 cfg->fc_flags |= RTF_REJECT;
1957
1958         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1959         cfg->fc_nlinfo.nlh = nlh;
1960
1961         if (tb[RTA_GATEWAY]) {
1962                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1963                 cfg->fc_flags |= RTF_GATEWAY;
1964         }
1965
1966         if (tb[RTA_DST]) {
1967                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1968
1969                 if (nla_len(tb[RTA_DST]) < plen)
1970                         goto errout;
1971
1972                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1973         }
1974
1975         if (tb[RTA_SRC]) {
1976                 int plen = (rtm->rtm_src_len + 7) >> 3;
1977
1978                 if (nla_len(tb[RTA_SRC]) < plen)
1979                         goto errout;
1980
1981                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1982         }
1983
1984         if (tb[RTA_OIF])
1985                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1986
1987         if (tb[RTA_PRIORITY])
1988                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1989
1990         if (tb[RTA_METRICS]) {
1991                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1992                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1993         }
1994
1995         if (tb[RTA_TABLE])
1996                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1997
1998         err = 0;
1999 errout:
2000         return err;
2001 }
2002
2003 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2004 {
2005         struct net *net = skb->sk->sk_net;
2006         struct fib6_config cfg;
2007         int err;
2008
2009         if (net != &init_net)
2010                 return -EINVAL;
2011
2012         err = rtm_to_fib6_config(skb, nlh, &cfg);
2013         if (err < 0)
2014                 return err;
2015
2016         return ip6_route_del(&cfg);
2017 }
2018
2019 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2020 {
2021         struct net *net = skb->sk->sk_net;
2022         struct fib6_config cfg;
2023         int err;
2024
2025         if (net != &init_net)
2026                 return -EINVAL;
2027
2028         err = rtm_to_fib6_config(skb, nlh, &cfg);
2029         if (err < 0)
2030                 return err;
2031
2032         return ip6_route_add(&cfg);
2033 }
2034
2035 static inline size_t rt6_nlmsg_size(void)
2036 {
2037         return NLMSG_ALIGN(sizeof(struct rtmsg))
2038                + nla_total_size(16) /* RTA_SRC */
2039                + nla_total_size(16) /* RTA_DST */
2040                + nla_total_size(16) /* RTA_GATEWAY */
2041                + nla_total_size(16) /* RTA_PREFSRC */
2042                + nla_total_size(4) /* RTA_TABLE */
2043                + nla_total_size(4) /* RTA_IIF */
2044                + nla_total_size(4) /* RTA_OIF */
2045                + nla_total_size(4) /* RTA_PRIORITY */
2046                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2047                + nla_total_size(sizeof(struct rta_cacheinfo));
2048 }
2049
2050 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2051                          struct in6_addr *dst, struct in6_addr *src,
2052                          int iif, int type, u32 pid, u32 seq,
2053                          int prefix, unsigned int flags)
2054 {
2055         struct rtmsg *rtm;
2056         struct nlmsghdr *nlh;
2057         long expires;
2058         u32 table;
2059
2060         if (prefix) {   /* user wants prefix routes only */
2061                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2062                         /* success since this is not a prefix route */
2063                         return 1;
2064                 }
2065         }
2066
2067         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2068         if (nlh == NULL)
2069                 return -EMSGSIZE;
2070
2071         rtm = nlmsg_data(nlh);
2072         rtm->rtm_family = AF_INET6;
2073         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2074         rtm->rtm_src_len = rt->rt6i_src.plen;
2075         rtm->rtm_tos = 0;
2076         if (rt->rt6i_table)
2077                 table = rt->rt6i_table->tb6_id;
2078         else
2079                 table = RT6_TABLE_UNSPEC;
2080         rtm->rtm_table = table;
2081         NLA_PUT_U32(skb, RTA_TABLE, table);
2082         if (rt->rt6i_flags&RTF_REJECT)
2083                 rtm->rtm_type = RTN_UNREACHABLE;
2084         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2085                 rtm->rtm_type = RTN_LOCAL;
2086         else
2087                 rtm->rtm_type = RTN_UNICAST;
2088         rtm->rtm_flags = 0;
2089         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2090         rtm->rtm_protocol = rt->rt6i_protocol;
2091         if (rt->rt6i_flags&RTF_DYNAMIC)
2092                 rtm->rtm_protocol = RTPROT_REDIRECT;
2093         else if (rt->rt6i_flags & RTF_ADDRCONF)
2094                 rtm->rtm_protocol = RTPROT_KERNEL;
2095         else if (rt->rt6i_flags&RTF_DEFAULT)
2096                 rtm->rtm_protocol = RTPROT_RA;
2097
2098         if (rt->rt6i_flags&RTF_CACHE)
2099                 rtm->rtm_flags |= RTM_F_CLONED;
2100
2101         if (dst) {
2102                 NLA_PUT(skb, RTA_DST, 16, dst);
2103                 rtm->rtm_dst_len = 128;
2104         } else if (rtm->rtm_dst_len)
2105                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2106 #ifdef CONFIG_IPV6_SUBTREES
2107         if (src) {
2108                 NLA_PUT(skb, RTA_SRC, 16, src);
2109                 rtm->rtm_src_len = 128;
2110         } else if (rtm->rtm_src_len)
2111                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2112 #endif
2113         if (iif)
2114                 NLA_PUT_U32(skb, RTA_IIF, iif);
2115         else if (dst) {
2116                 struct in6_addr saddr_buf;
2117                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2118                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2119         }
2120
2121         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2122                 goto nla_put_failure;
2123
2124         if (rt->u.dst.neighbour)
2125                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2126
2127         if (rt->u.dst.dev)
2128                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2129
2130         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2131
2132         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2133         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2134                                expires, rt->u.dst.error) < 0)
2135                 goto nla_put_failure;
2136
2137         return nlmsg_end(skb, nlh);
2138
2139 nla_put_failure:
2140         nlmsg_cancel(skb, nlh);
2141         return -EMSGSIZE;
2142 }
2143
2144 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2145 {
2146         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2147         int prefix;
2148
2149         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2150                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2151                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2152         } else
2153                 prefix = 0;
2154
2155         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2156                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2157                      prefix, NLM_F_MULTI);
2158 }
2159
2160 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2161 {
2162         struct net *net = in_skb->sk->sk_net;
2163         struct nlattr *tb[RTA_MAX+1];
2164         struct rt6_info *rt;
2165         struct sk_buff *skb;
2166         struct rtmsg *rtm;
2167         struct flowi fl;
2168         int err, iif = 0;
2169
2170         if (net != &init_net)
2171                 return -EINVAL;
2172
2173         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2174         if (err < 0)
2175                 goto errout;
2176
2177         err = -EINVAL;
2178         memset(&fl, 0, sizeof(fl));
2179
2180         if (tb[RTA_SRC]) {
2181                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2182                         goto errout;
2183
2184                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2185         }
2186
2187         if (tb[RTA_DST]) {
2188                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2189                         goto errout;
2190
2191                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2192         }
2193
2194         if (tb[RTA_IIF])
2195                 iif = nla_get_u32(tb[RTA_IIF]);
2196
2197         if (tb[RTA_OIF])
2198                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2199
2200         if (iif) {
2201                 struct net_device *dev;
2202                 dev = __dev_get_by_index(&init_net, iif);
2203                 if (!dev) {
2204                         err = -ENODEV;
2205                         goto errout;
2206                 }
2207         }
2208
2209         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2210         if (skb == NULL) {
2211                 err = -ENOBUFS;
2212                 goto errout;
2213         }
2214
2215         /* Reserve room for dummy headers, this skb can pass
2216            through good chunk of routing engine.
2217          */
2218         skb_reset_mac_header(skb);
2219         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2220
2221         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2222         skb->dst = &rt->u.dst;
2223
2224         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2225                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2226                             nlh->nlmsg_seq, 0, 0);
2227         if (err < 0) {
2228                 kfree_skb(skb);
2229                 goto errout;
2230         }
2231
2232         err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
2233 errout:
2234         return err;
2235 }
2236
2237 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2238 {
2239         struct sk_buff *skb;
2240         u32 seq;
2241         int err;
2242
2243         err = -ENOBUFS;
2244         seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2245
2246         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2247         if (skb == NULL)
2248                 goto errout;
2249
2250         err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2251                                 event, info->pid, seq, 0, 0);
2252         if (err < 0) {
2253                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2254                 WARN_ON(err == -EMSGSIZE);
2255                 kfree_skb(skb);
2256                 goto errout;
2257         }
2258         err = rtnl_notify(skb, &init_net, info->pid,
2259                                 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
2260 errout:
2261         if (err < 0)
2262                 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
2263 }
2264
2265 /*
2266  *      /proc
2267  */
2268
2269 #ifdef CONFIG_PROC_FS
2270
2271 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2272
2273 struct rt6_proc_arg
2274 {
2275         char *buffer;
2276         int offset;
2277         int length;
2278         int skip;
2279         int len;
2280 };
2281
2282 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2283 {
2284         struct seq_file *m = p_arg;
2285
2286         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2287                    rt->rt6i_dst.plen);
2288
2289 #ifdef CONFIG_IPV6_SUBTREES
2290         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2291                    rt->rt6i_src.plen);
2292 #else
2293         seq_puts(m, "00000000000000000000000000000000 00 ");
2294 #endif
2295
2296         if (rt->rt6i_nexthop) {
2297                 seq_printf(m, NIP6_SEQFMT,
2298                            NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2299         } else {
2300                 seq_puts(m, "00000000000000000000000000000000");
2301         }
2302         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2303                    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2304                    rt->u.dst.__use, rt->rt6i_flags,
2305                    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2306         return 0;
2307 }
2308
2309 static int ipv6_route_show(struct seq_file *m, void *v)
2310 {
2311         fib6_clean_all(rt6_info_route, 0, m);
2312         return 0;
2313 }
2314
2315 static int ipv6_route_open(struct inode *inode, struct file *file)
2316 {
2317         return single_open(file, ipv6_route_show, NULL);
2318 }
2319
2320 static const struct file_operations ipv6_route_proc_fops = {
2321         .owner          = THIS_MODULE,
2322         .open           = ipv6_route_open,
2323         .read           = seq_read,
2324         .llseek         = seq_lseek,
2325         .release        = single_release,
2326 };
2327
2328 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2329 {
2330         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2331                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2332                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2333                       rt6_stats.fib_rt_cache,
2334                       atomic_read(&ip6_dst_ops.entries),
2335                       rt6_stats.fib_discarded_routes);
2336
2337         return 0;
2338 }
2339
2340 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2341 {
2342         return single_open(file, rt6_stats_seq_show, NULL);
2343 }
2344
2345 static const struct file_operations rt6_stats_seq_fops = {
2346         .owner   = THIS_MODULE,
2347         .open    = rt6_stats_seq_open,
2348         .read    = seq_read,
2349         .llseek  = seq_lseek,
2350         .release = single_release,
2351 };
2352
2353 static int ipv6_route_proc_init(struct net *net)
2354 {
2355         int ret = -ENOMEM;
2356         if (!proc_net_fops_create(net, "ipv6_route",
2357                                   0, &ipv6_route_proc_fops))
2358                 goto out;
2359
2360         if (!proc_net_fops_create(net, "rt6_stats",
2361                                   S_IRUGO, &rt6_stats_seq_fops))
2362                 goto out_ipv6_route;
2363
2364         ret = 0;
2365 out:
2366         return ret;
2367 out_ipv6_route:
2368         proc_net_remove(net, "ipv6_route");
2369         goto out;
2370 }
2371
2372 static void ipv6_route_proc_fini(struct net *net)
2373 {
2374         proc_net_remove(net, "ipv6_route");
2375         proc_net_remove(net, "rt6_stats");
2376 }
2377 #else
2378 static inline int ipv6_route_proc_init(struct net *net)
2379 {
2380         return 0;
2381 }
2382 static inline void ipv6_route_proc_fini(struct net *net)
2383 {
2384         return ;
2385 }
2386 #endif  /* CONFIG_PROC_FS */
2387
2388 #ifdef CONFIG_SYSCTL
2389
2390 static
2391 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2392                               void __user *buffer, size_t *lenp, loff_t *ppos)
2393 {
2394         int delay = init_net.ipv6.sysctl.flush_delay;
2395         if (write) {
2396                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2397                 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay);
2398                 return 0;
2399         } else
2400                 return -EINVAL;
2401 }
2402
2403 ctl_table ipv6_route_table_template[] = {
2404         {
2405                 .procname       =       "flush",
2406                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2407                 .maxlen         =       sizeof(int),
2408                 .mode           =       0200,
2409                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2410         },
2411         {
2412                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2413                 .procname       =       "gc_thresh",
2414                 .data           =       &ip6_dst_ops.gc_thresh,
2415                 .maxlen         =       sizeof(int),
2416                 .mode           =       0644,
2417                 .proc_handler   =       &proc_dointvec,
2418         },
2419         {
2420                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2421                 .procname       =       "max_size",
2422                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2423                 .maxlen         =       sizeof(int),
2424                 .mode           =       0644,
2425                 .proc_handler   =       &proc_dointvec,
2426         },
2427         {
2428                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2429                 .procname       =       "gc_min_interval",
2430                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2431                 .maxlen         =       sizeof(int),
2432                 .mode           =       0644,
2433                 .proc_handler   =       &proc_dointvec_jiffies,
2434                 .strategy       =       &sysctl_jiffies,
2435         },
2436         {
2437                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2438                 .procname       =       "gc_timeout",
2439                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2440                 .maxlen         =       sizeof(int),
2441                 .mode           =       0644,
2442                 .proc_handler   =       &proc_dointvec_jiffies,
2443                 .strategy       =       &sysctl_jiffies,
2444         },
2445         {
2446                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2447                 .procname       =       "gc_interval",
2448                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2449                 .maxlen         =       sizeof(int),
2450                 .mode           =       0644,
2451                 .proc_handler   =       &proc_dointvec_jiffies,
2452                 .strategy       =       &sysctl_jiffies,
2453         },
2454         {
2455                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2456                 .procname       =       "gc_elasticity",
2457                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2458                 .maxlen         =       sizeof(int),
2459                 .mode           =       0644,
2460                 .proc_handler   =       &proc_dointvec_jiffies,
2461                 .strategy       =       &sysctl_jiffies,
2462         },
2463         {
2464                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2465                 .procname       =       "mtu_expires",
2466                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2467                 .maxlen         =       sizeof(int),
2468                 .mode           =       0644,
2469                 .proc_handler   =       &proc_dointvec_jiffies,
2470                 .strategy       =       &sysctl_jiffies,
2471         },
2472         {
2473                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2474                 .procname       =       "min_adv_mss",
2475                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2476                 .maxlen         =       sizeof(int),
2477                 .mode           =       0644,
2478                 .proc_handler   =       &proc_dointvec_jiffies,
2479                 .strategy       =       &sysctl_jiffies,
2480         },
2481         {
2482                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2483                 .procname       =       "gc_min_interval_ms",
2484                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2485                 .maxlen         =       sizeof(int),
2486                 .mode           =       0644,
2487                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2488                 .strategy       =       &sysctl_ms_jiffies,
2489         },
2490         { .ctl_name = 0 }
2491 };
2492
2493 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2494 {
2495         struct ctl_table *table;
2496
2497         table = kmemdup(ipv6_route_table_template,
2498                         sizeof(ipv6_route_table_template),
2499                         GFP_KERNEL);
2500         return table;
2501 }
2502 #endif
2503
2504 int __init ip6_route_init(void)
2505 {
2506         int ret;
2507
2508         ip6_dst_ops.kmem_cachep =
2509                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2510                                   SLAB_HWCACHE_ALIGN, NULL);
2511         if (!ip6_dst_ops.kmem_cachep)
2512                 return -ENOMEM;
2513
2514         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2515
2516         ret = fib6_init();
2517         if (ret)
2518                 goto out_kmem_cache;
2519
2520         ret = ipv6_route_proc_init(&init_net);
2521         if (ret)
2522                 goto out_fib6_init;
2523
2524         ret = xfrm6_init();
2525         if (ret)
2526                 goto out_proc_init;
2527
2528         ret = fib6_rules_init();
2529         if (ret)
2530                 goto xfrm6_init;
2531
2532         ret = -ENOBUFS;
2533         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2534             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2535             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2536                 goto fib6_rules_init;
2537
2538         ret = 0;
2539 out:
2540         return ret;
2541
2542 fib6_rules_init:
2543         fib6_rules_cleanup();
2544 xfrm6_init:
2545         xfrm6_fini();
2546 out_proc_init:
2547         ipv6_route_proc_fini(&init_net);
2548 out_fib6_init:
2549         rt6_ifdown(NULL);
2550         fib6_gc_cleanup();
2551 out_kmem_cache:
2552         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2553         goto out;
2554 }
2555
2556 void ip6_route_cleanup(void)
2557 {
2558         fib6_rules_cleanup();
2559         ipv6_route_proc_fini(&init_net);
2560         xfrm6_fini();
2561         rt6_ifdown(NULL);
2562         fib6_gc_cleanup();
2563         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2564 }