[NETNS][IPV6]: Make multiple instance of sysctl tables.
[safe/jmp/linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <net/net_namespace.h>
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52 #include <net/dst.h>
53 #include <net/xfrm.h>
54 #include <net/netevent.h>
55 #include <net/netlink.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74 #define CLONE_OFFLINK_ROUTE 0
75
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void             ip6_dst_destroy(struct dst_entry *);
88 static void             ip6_dst_ifdown(struct dst_entry *,
89                                        struct net_device *dev, int how);
90 static int               ip6_dst_gc(void);
91
92 static int              ip6_pkt_discard(struct sk_buff *skb);
93 static int              ip6_pkt_discard_out(struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
99                                            struct in6_addr *gwaddr, int ifindex,
100                                            unsigned pref);
101 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
102                                            struct in6_addr *gwaddr, int ifindex);
103 #endif
104
105 static struct dst_ops ip6_dst_ops = {
106         .family                 =       AF_INET6,
107         .protocol               =       __constant_htons(ETH_P_IPV6),
108         .gc                     =       ip6_dst_gc,
109         .gc_thresh              =       1024,
110         .check                  =       ip6_dst_check,
111         .destroy                =       ip6_dst_destroy,
112         .ifdown                 =       ip6_dst_ifdown,
113         .negative_advice        =       ip6_negative_advice,
114         .link_failure           =       ip6_link_failure,
115         .update_pmtu            =       ip6_rt_update_pmtu,
116         .local_out              =       ip6_local_out,
117         .entry_size             =       sizeof(struct rt6_info),
118 };
119
120 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
121 {
122 }
123
124 static struct dst_ops ip6_dst_blackhole_ops = {
125         .family                 =       AF_INET6,
126         .protocol               =       __constant_htons(ETH_P_IPV6),
127         .destroy                =       ip6_dst_destroy,
128         .check                  =       ip6_dst_check,
129         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
130         .entry_size             =       sizeof(struct rt6_info),
131 };
132
133 struct rt6_info ip6_null_entry = {
134         .u = {
135                 .dst = {
136                         .__refcnt       = ATOMIC_INIT(1),
137                         .__use          = 1,
138                         .obsolete       = -1,
139                         .error          = -ENETUNREACH,
140                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
141                         .input          = ip6_pkt_discard,
142                         .output         = ip6_pkt_discard_out,
143                         .ops            = &ip6_dst_ops,
144                         .path           = (struct dst_entry*)&ip6_null_entry,
145                 }
146         },
147         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
148         .rt6i_metric    = ~(u32) 0,
149         .rt6i_ref       = ATOMIC_INIT(1),
150 };
151
152 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
153
154 static int ip6_pkt_prohibit(struct sk_buff *skb);
155 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
156
157 struct rt6_info ip6_prohibit_entry = {
158         .u = {
159                 .dst = {
160                         .__refcnt       = ATOMIC_INIT(1),
161                         .__use          = 1,
162                         .obsolete       = -1,
163                         .error          = -EACCES,
164                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
165                         .input          = ip6_pkt_prohibit,
166                         .output         = ip6_pkt_prohibit_out,
167                         .ops            = &ip6_dst_ops,
168                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
169                 }
170         },
171         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
172         .rt6i_metric    = ~(u32) 0,
173         .rt6i_ref       = ATOMIC_INIT(1),
174 };
175
176 struct rt6_info ip6_blk_hole_entry = {
177         .u = {
178                 .dst = {
179                         .__refcnt       = ATOMIC_INIT(1),
180                         .__use          = 1,
181                         .obsolete       = -1,
182                         .error          = -EINVAL,
183                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
184                         .input          = dst_discard,
185                         .output         = dst_discard,
186                         .ops            = &ip6_dst_ops,
187                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
188                 }
189         },
190         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
191         .rt6i_metric    = ~(u32) 0,
192         .rt6i_ref       = ATOMIC_INIT(1),
193 };
194
195 #endif
196
197 /* allocate dst with ip6_dst_ops */
198 static __inline__ struct rt6_info *ip6_dst_alloc(void)
199 {
200         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
201 }
202
203 static void ip6_dst_destroy(struct dst_entry *dst)
204 {
205         struct rt6_info *rt = (struct rt6_info *)dst;
206         struct inet6_dev *idev = rt->rt6i_idev;
207
208         if (idev != NULL) {
209                 rt->rt6i_idev = NULL;
210                 in6_dev_put(idev);
211         }
212 }
213
214 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
215                            int how)
216 {
217         struct rt6_info *rt = (struct rt6_info *)dst;
218         struct inet6_dev *idev = rt->rt6i_idev;
219         struct net_device *loopback_dev =
220                 dev->nd_net->loopback_dev;
221
222         if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
223                 struct inet6_dev *loopback_idev =
224                         in6_dev_get(loopback_dev);
225                 if (loopback_idev != NULL) {
226                         rt->rt6i_idev = loopback_idev;
227                         in6_dev_put(idev);
228                 }
229         }
230 }
231
232 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
233 {
234         return (rt->rt6i_flags & RTF_EXPIRES &&
235                 time_after(jiffies, rt->rt6i_expires));
236 }
237
238 static inline int rt6_need_strict(struct in6_addr *daddr)
239 {
240         return (ipv6_addr_type(daddr) &
241                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
242 }
243
244 /*
245  *      Route lookup. Any table->tb6_lock is implied.
246  */
247
248 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
249                                                     int oif,
250                                                     int strict)
251 {
252         struct rt6_info *local = NULL;
253         struct rt6_info *sprt;
254
255         if (oif) {
256                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
257                         struct net_device *dev = sprt->rt6i_dev;
258                         if (dev->ifindex == oif)
259                                 return sprt;
260                         if (dev->flags & IFF_LOOPBACK) {
261                                 if (sprt->rt6i_idev == NULL ||
262                                     sprt->rt6i_idev->dev->ifindex != oif) {
263                                         if (strict && oif)
264                                                 continue;
265                                         if (local && (!oif ||
266                                                       local->rt6i_idev->dev->ifindex == oif))
267                                                 continue;
268                                 }
269                                 local = sprt;
270                         }
271                 }
272
273                 if (local)
274                         return local;
275
276                 if (strict)
277                         return &ip6_null_entry;
278         }
279         return rt;
280 }
281
282 #ifdef CONFIG_IPV6_ROUTER_PREF
283 static void rt6_probe(struct rt6_info *rt)
284 {
285         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
286         /*
287          * Okay, this does not seem to be appropriate
288          * for now, however, we need to check if it
289          * is really so; aka Router Reachability Probing.
290          *
291          * Router Reachability Probe MUST be rate-limited
292          * to no more than one per minute.
293          */
294         if (!neigh || (neigh->nud_state & NUD_VALID))
295                 return;
296         read_lock_bh(&neigh->lock);
297         if (!(neigh->nud_state & NUD_VALID) &&
298             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
299                 struct in6_addr mcaddr;
300                 struct in6_addr *target;
301
302                 neigh->updated = jiffies;
303                 read_unlock_bh(&neigh->lock);
304
305                 target = (struct in6_addr *)&neigh->primary_key;
306                 addrconf_addr_solict_mult(target, &mcaddr);
307                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
308         } else
309                 read_unlock_bh(&neigh->lock);
310 }
311 #else
312 static inline void rt6_probe(struct rt6_info *rt)
313 {
314         return;
315 }
316 #endif
317
318 /*
319  * Default Router Selection (RFC 2461 6.3.6)
320  */
321 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
322 {
323         struct net_device *dev = rt->rt6i_dev;
324         if (!oif || dev->ifindex == oif)
325                 return 2;
326         if ((dev->flags & IFF_LOOPBACK) &&
327             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
328                 return 1;
329         return 0;
330 }
331
332 static inline int rt6_check_neigh(struct rt6_info *rt)
333 {
334         struct neighbour *neigh = rt->rt6i_nexthop;
335         int m;
336         if (rt->rt6i_flags & RTF_NONEXTHOP ||
337             !(rt->rt6i_flags & RTF_GATEWAY))
338                 m = 1;
339         else if (neigh) {
340                 read_lock_bh(&neigh->lock);
341                 if (neigh->nud_state & NUD_VALID)
342                         m = 2;
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344                 else if (neigh->nud_state & NUD_FAILED)
345                         m = 0;
346 #endif
347                 else
348                         m = 1;
349                 read_unlock_bh(&neigh->lock);
350         } else
351                 m = 0;
352         return m;
353 }
354
355 static int rt6_score_route(struct rt6_info *rt, int oif,
356                            int strict)
357 {
358         int m, n;
359
360         m = rt6_check_dev(rt, oif);
361         if (!m && (strict & RT6_LOOKUP_F_IFACE))
362                 return -1;
363 #ifdef CONFIG_IPV6_ROUTER_PREF
364         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
365 #endif
366         n = rt6_check_neigh(rt);
367         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
368                 return -1;
369         return m;
370 }
371
372 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
373                                    int *mpri, struct rt6_info *match)
374 {
375         int m;
376
377         if (rt6_check_expired(rt))
378                 goto out;
379
380         m = rt6_score_route(rt, oif, strict);
381         if (m < 0)
382                 goto out;
383
384         if (m > *mpri) {
385                 if (strict & RT6_LOOKUP_F_REACHABLE)
386                         rt6_probe(match);
387                 *mpri = m;
388                 match = rt;
389         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
390                 rt6_probe(rt);
391         }
392
393 out:
394         return match;
395 }
396
397 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
398                                      struct rt6_info *rr_head,
399                                      u32 metric, int oif, int strict)
400 {
401         struct rt6_info *rt, *match;
402         int mpri = -1;
403
404         match = NULL;
405         for (rt = rr_head; rt && rt->rt6i_metric == metric;
406              rt = rt->u.dst.rt6_next)
407                 match = find_match(rt, oif, strict, &mpri, match);
408         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
409              rt = rt->u.dst.rt6_next)
410                 match = find_match(rt, oif, strict, &mpri, match);
411
412         return match;
413 }
414
415 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
416 {
417         struct rt6_info *match, *rt0;
418
419         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
420                   __FUNCTION__, fn->leaf, oif);
421
422         rt0 = fn->rr_ptr;
423         if (!rt0)
424                 fn->rr_ptr = rt0 = fn->leaf;
425
426         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
427
428         if (!match &&
429             (strict & RT6_LOOKUP_F_REACHABLE)) {
430                 struct rt6_info *next = rt0->u.dst.rt6_next;
431
432                 /* no entries matched; do round-robin */
433                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
434                         next = fn->leaf;
435
436                 if (next != rt0)
437                         fn->rr_ptr = next;
438         }
439
440         RT6_TRACE("%s() => %p\n",
441                   __FUNCTION__, match);
442
443         return (match ? match : &ip6_null_entry);
444 }
445
446 #ifdef CONFIG_IPV6_ROUTE_INFO
447 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
448                   struct in6_addr *gwaddr)
449 {
450         struct route_info *rinfo = (struct route_info *) opt;
451         struct in6_addr prefix_buf, *prefix;
452         unsigned int pref;
453         u32 lifetime;
454         struct rt6_info *rt;
455
456         if (len < sizeof(struct route_info)) {
457                 return -EINVAL;
458         }
459
460         /* Sanity check for prefix_len and length */
461         if (rinfo->length > 3) {
462                 return -EINVAL;
463         } else if (rinfo->prefix_len > 128) {
464                 return -EINVAL;
465         } else if (rinfo->prefix_len > 64) {
466                 if (rinfo->length < 2) {
467                         return -EINVAL;
468                 }
469         } else if (rinfo->prefix_len > 0) {
470                 if (rinfo->length < 1) {
471                         return -EINVAL;
472                 }
473         }
474
475         pref = rinfo->route_pref;
476         if (pref == ICMPV6_ROUTER_PREF_INVALID)
477                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
478
479         lifetime = ntohl(rinfo->lifetime);
480         if (lifetime == 0xffffffff) {
481                 /* infinity */
482         } else if (lifetime > 0x7fffffff/HZ) {
483                 /* Avoid arithmetic overflow */
484                 lifetime = 0x7fffffff/HZ - 1;
485         }
486
487         if (rinfo->length == 3)
488                 prefix = (struct in6_addr *)rinfo->prefix;
489         else {
490                 /* this function is safe */
491                 ipv6_addr_prefix(&prefix_buf,
492                                  (struct in6_addr *)rinfo->prefix,
493                                  rinfo->prefix_len);
494                 prefix = &prefix_buf;
495         }
496
497         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
498
499         if (rt && !lifetime) {
500                 ip6_del_rt(rt);
501                 rt = NULL;
502         }
503
504         if (!rt && lifetime)
505                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
506                                         pref);
507         else if (rt)
508                 rt->rt6i_flags = RTF_ROUTEINFO |
509                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
510
511         if (rt) {
512                 if (lifetime == 0xffffffff) {
513                         rt->rt6i_flags &= ~RTF_EXPIRES;
514                 } else {
515                         rt->rt6i_expires = jiffies + HZ * lifetime;
516                         rt->rt6i_flags |= RTF_EXPIRES;
517                 }
518                 dst_release(&rt->u.dst);
519         }
520         return 0;
521 }
522 #endif
523
524 #define BACKTRACK(saddr) \
525 do { \
526         if (rt == &ip6_null_entry) { \
527                 struct fib6_node *pn; \
528                 while (1) { \
529                         if (fn->fn_flags & RTN_TL_ROOT) \
530                                 goto out; \
531                         pn = fn->parent; \
532                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
533                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
534                         else \
535                                 fn = pn; \
536                         if (fn->fn_flags & RTN_RTINFO) \
537                                 goto restart; \
538                 } \
539         } \
540 } while(0)
541
542 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
543                                              struct flowi *fl, int flags)
544 {
545         struct fib6_node *fn;
546         struct rt6_info *rt;
547
548         read_lock_bh(&table->tb6_lock);
549         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
550 restart:
551         rt = fn->leaf;
552         rt = rt6_device_match(rt, fl->oif, flags);
553         BACKTRACK(&fl->fl6_src);
554 out:
555         dst_use(&rt->u.dst, jiffies);
556         read_unlock_bh(&table->tb6_lock);
557         return rt;
558
559 }
560
561 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
562                             int oif, int strict)
563 {
564         struct flowi fl = {
565                 .oif = oif,
566                 .nl_u = {
567                         .ip6_u = {
568                                 .daddr = *daddr,
569                         },
570                 },
571         };
572         struct dst_entry *dst;
573         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
574
575         if (saddr) {
576                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577                 flags |= RT6_LOOKUP_F_HAS_SADDR;
578         }
579
580         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
581         if (dst->error == 0)
582                 return (struct rt6_info *) dst;
583
584         dst_release(dst);
585
586         return NULL;
587 }
588
589 EXPORT_SYMBOL(rt6_lookup);
590
591 /* ip6_ins_rt is called with FREE table->tb6_lock.
592    It takes new route entry, the addition fails by any reason the
593    route is freed. In any case, if caller does not hold it, it may
594    be destroyed.
595  */
596
597 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
598 {
599         int err;
600         struct fib6_table *table;
601
602         table = rt->rt6i_table;
603         write_lock_bh(&table->tb6_lock);
604         err = fib6_add(&table->tb6_root, rt, info);
605         write_unlock_bh(&table->tb6_lock);
606
607         return err;
608 }
609
610 int ip6_ins_rt(struct rt6_info *rt)
611 {
612         struct nl_info info = {};
613         return __ip6_ins_rt(rt, &info);
614 }
615
616 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
617                                       struct in6_addr *saddr)
618 {
619         struct rt6_info *rt;
620
621         /*
622          *      Clone the route.
623          */
624
625         rt = ip6_rt_copy(ort);
626
627         if (rt) {
628                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
629                         if (rt->rt6i_dst.plen != 128 &&
630                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
631                                 rt->rt6i_flags |= RTF_ANYCAST;
632                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
633                 }
634
635                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
636                 rt->rt6i_dst.plen = 128;
637                 rt->rt6i_flags |= RTF_CACHE;
638                 rt->u.dst.flags |= DST_HOST;
639
640 #ifdef CONFIG_IPV6_SUBTREES
641                 if (rt->rt6i_src.plen && saddr) {
642                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
643                         rt->rt6i_src.plen = 128;
644                 }
645 #endif
646
647                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
648
649         }
650
651         return rt;
652 }
653
654 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
655 {
656         struct rt6_info *rt = ip6_rt_copy(ort);
657         if (rt) {
658                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
659                 rt->rt6i_dst.plen = 128;
660                 rt->rt6i_flags |= RTF_CACHE;
661                 rt->u.dst.flags |= DST_HOST;
662                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
663         }
664         return rt;
665 }
666
667 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
668                                             struct flowi *fl, int flags)
669 {
670         struct fib6_node *fn;
671         struct rt6_info *rt, *nrt;
672         int strict = 0;
673         int attempts = 3;
674         int err;
675         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
676
677         strict |= flags & RT6_LOOKUP_F_IFACE;
678
679 relookup:
680         read_lock_bh(&table->tb6_lock);
681
682 restart_2:
683         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
684
685 restart:
686         rt = rt6_select(fn, oif, strict | reachable);
687         BACKTRACK(&fl->fl6_src);
688         if (rt == &ip6_null_entry ||
689             rt->rt6i_flags & RTF_CACHE)
690                 goto out;
691
692         dst_hold(&rt->u.dst);
693         read_unlock_bh(&table->tb6_lock);
694
695         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
696                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
697         else {
698 #if CLONE_OFFLINK_ROUTE
699                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
700 #else
701                 goto out2;
702 #endif
703         }
704
705         dst_release(&rt->u.dst);
706         rt = nrt ? : &ip6_null_entry;
707
708         dst_hold(&rt->u.dst);
709         if (nrt) {
710                 err = ip6_ins_rt(nrt);
711                 if (!err)
712                         goto out2;
713         }
714
715         if (--attempts <= 0)
716                 goto out2;
717
718         /*
719          * Race condition! In the gap, when table->tb6_lock was
720          * released someone could insert this route.  Relookup.
721          */
722         dst_release(&rt->u.dst);
723         goto relookup;
724
725 out:
726         if (reachable) {
727                 reachable = 0;
728                 goto restart_2;
729         }
730         dst_hold(&rt->u.dst);
731         read_unlock_bh(&table->tb6_lock);
732 out2:
733         rt->u.dst.lastuse = jiffies;
734         rt->u.dst.__use++;
735
736         return rt;
737 }
738
739 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
740                                             struct flowi *fl, int flags)
741 {
742         return ip6_pol_route(table, fl->iif, fl, flags);
743 }
744
745 void ip6_route_input(struct sk_buff *skb)
746 {
747         struct ipv6hdr *iph = ipv6_hdr(skb);
748         int flags = RT6_LOOKUP_F_HAS_SADDR;
749         struct flowi fl = {
750                 .iif = skb->dev->ifindex,
751                 .nl_u = {
752                         .ip6_u = {
753                                 .daddr = iph->daddr,
754                                 .saddr = iph->saddr,
755                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
756                         },
757                 },
758                 .mark = skb->mark,
759                 .proto = iph->nexthdr,
760         };
761
762         if (rt6_need_strict(&iph->daddr))
763                 flags |= RT6_LOOKUP_F_IFACE;
764
765         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
766 }
767
768 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
769                                              struct flowi *fl, int flags)
770 {
771         return ip6_pol_route(table, fl->oif, fl, flags);
772 }
773
774 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
775 {
776         int flags = 0;
777
778         if (rt6_need_strict(&fl->fl6_dst))
779                 flags |= RT6_LOOKUP_F_IFACE;
780
781         if (!ipv6_addr_any(&fl->fl6_src))
782                 flags |= RT6_LOOKUP_F_HAS_SADDR;
783
784         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
785 }
786
787 EXPORT_SYMBOL(ip6_route_output);
788
789 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
790 {
791         struct rt6_info *ort = (struct rt6_info *) *dstp;
792         struct rt6_info *rt = (struct rt6_info *)
793                 dst_alloc(&ip6_dst_blackhole_ops);
794         struct dst_entry *new = NULL;
795
796         if (rt) {
797                 new = &rt->u.dst;
798
799                 atomic_set(&new->__refcnt, 1);
800                 new->__use = 1;
801                 new->input = dst_discard;
802                 new->output = dst_discard;
803
804                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
805                 new->dev = ort->u.dst.dev;
806                 if (new->dev)
807                         dev_hold(new->dev);
808                 rt->rt6i_idev = ort->rt6i_idev;
809                 if (rt->rt6i_idev)
810                         in6_dev_hold(rt->rt6i_idev);
811                 rt->rt6i_expires = 0;
812
813                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
814                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
815                 rt->rt6i_metric = 0;
816
817                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
818 #ifdef CONFIG_IPV6_SUBTREES
819                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
820 #endif
821
822                 dst_free(new);
823         }
824
825         dst_release(*dstp);
826         *dstp = new;
827         return (new ? 0 : -ENOMEM);
828 }
829 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
830
831 /*
832  *      Destination cache support functions
833  */
834
835 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
836 {
837         struct rt6_info *rt;
838
839         rt = (struct rt6_info *) dst;
840
841         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
842                 return dst;
843
844         return NULL;
845 }
846
847 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
848 {
849         struct rt6_info *rt = (struct rt6_info *) dst;
850
851         if (rt) {
852                 if (rt->rt6i_flags & RTF_CACHE)
853                         ip6_del_rt(rt);
854                 else
855                         dst_release(dst);
856         }
857         return NULL;
858 }
859
860 static void ip6_link_failure(struct sk_buff *skb)
861 {
862         struct rt6_info *rt;
863
864         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
865
866         rt = (struct rt6_info *) skb->dst;
867         if (rt) {
868                 if (rt->rt6i_flags&RTF_CACHE) {
869                         dst_set_expires(&rt->u.dst, 0);
870                         rt->rt6i_flags |= RTF_EXPIRES;
871                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
872                         rt->rt6i_node->fn_sernum = -1;
873         }
874 }
875
876 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
877 {
878         struct rt6_info *rt6 = (struct rt6_info*)dst;
879
880         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
881                 rt6->rt6i_flags |= RTF_MODIFIED;
882                 if (mtu < IPV6_MIN_MTU) {
883                         mtu = IPV6_MIN_MTU;
884                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
885                 }
886                 dst->metrics[RTAX_MTU-1] = mtu;
887                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
888         }
889 }
890
891 static int ipv6_get_mtu(struct net_device *dev);
892
893 static inline unsigned int ipv6_advmss(unsigned int mtu)
894 {
895         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
896
897         if (mtu < ip6_rt_min_advmss)
898                 mtu = ip6_rt_min_advmss;
899
900         /*
901          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
902          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
903          * IPV6_MAXPLEN is also valid and means: "any MSS,
904          * rely only on pmtu discovery"
905          */
906         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
907                 mtu = IPV6_MAXPLEN;
908         return mtu;
909 }
910
911 static struct dst_entry *ndisc_dst_gc_list;
912 static DEFINE_SPINLOCK(ndisc_lock);
913
914 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
915                                   struct neighbour *neigh,
916                                   struct in6_addr *addr,
917                                   int (*output)(struct sk_buff *))
918 {
919         struct rt6_info *rt;
920         struct inet6_dev *idev = in6_dev_get(dev);
921
922         if (unlikely(idev == NULL))
923                 return NULL;
924
925         rt = ip6_dst_alloc();
926         if (unlikely(rt == NULL)) {
927                 in6_dev_put(idev);
928                 goto out;
929         }
930
931         dev_hold(dev);
932         if (neigh)
933                 neigh_hold(neigh);
934         else
935                 neigh = ndisc_get_neigh(dev, addr);
936
937         rt->rt6i_dev      = dev;
938         rt->rt6i_idev     = idev;
939         rt->rt6i_nexthop  = neigh;
940         atomic_set(&rt->u.dst.__refcnt, 1);
941         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
942         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
943         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
944         rt->u.dst.output  = output;
945
946 #if 0   /* there's no chance to use these for ndisc */
947         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
948                                 ? DST_HOST
949                                 : 0;
950         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
951         rt->rt6i_dst.plen = 128;
952 #endif
953
954         spin_lock_bh(&ndisc_lock);
955         rt->u.dst.next = ndisc_dst_gc_list;
956         ndisc_dst_gc_list = &rt->u.dst;
957         spin_unlock_bh(&ndisc_lock);
958
959         fib6_force_start_gc();
960
961 out:
962         return &rt->u.dst;
963 }
964
965 int ndisc_dst_gc(int *more)
966 {
967         struct dst_entry *dst, *next, **pprev;
968         int freed;
969
970         next = NULL;
971         freed = 0;
972
973         spin_lock_bh(&ndisc_lock);
974         pprev = &ndisc_dst_gc_list;
975
976         while ((dst = *pprev) != NULL) {
977                 if (!atomic_read(&dst->__refcnt)) {
978                         *pprev = dst->next;
979                         dst_free(dst);
980                         freed++;
981                 } else {
982                         pprev = &dst->next;
983                         (*more)++;
984                 }
985         }
986
987         spin_unlock_bh(&ndisc_lock);
988
989         return freed;
990 }
991
992 static int ip6_dst_gc(void)
993 {
994         static unsigned expire = 30*HZ;
995         static unsigned long last_gc;
996         unsigned long now = jiffies;
997
998         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
999             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1000                 goto out;
1001
1002         expire++;
1003         fib6_run_gc(expire);
1004         last_gc = now;
1005         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1006                 expire = ip6_rt_gc_timeout>>1;
1007
1008 out:
1009         expire -= expire>>ip6_rt_gc_elasticity;
1010         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1011 }
1012
1013 /* Clean host part of a prefix. Not necessary in radix tree,
1014    but results in cleaner routing tables.
1015
1016    Remove it only when all the things will work!
1017  */
1018
1019 static int ipv6_get_mtu(struct net_device *dev)
1020 {
1021         int mtu = IPV6_MIN_MTU;
1022         struct inet6_dev *idev;
1023
1024         idev = in6_dev_get(dev);
1025         if (idev) {
1026                 mtu = idev->cnf.mtu6;
1027                 in6_dev_put(idev);
1028         }
1029         return mtu;
1030 }
1031
1032 int ipv6_get_hoplimit(struct net_device *dev)
1033 {
1034         int hoplimit = ipv6_devconf.hop_limit;
1035         struct inet6_dev *idev;
1036
1037         idev = in6_dev_get(dev);
1038         if (idev) {
1039                 hoplimit = idev->cnf.hop_limit;
1040                 in6_dev_put(idev);
1041         }
1042         return hoplimit;
1043 }
1044
1045 /*
1046  *
1047  */
1048
1049 int ip6_route_add(struct fib6_config *cfg)
1050 {
1051         int err;
1052         struct rt6_info *rt = NULL;
1053         struct net_device *dev = NULL;
1054         struct inet6_dev *idev = NULL;
1055         struct fib6_table *table;
1056         int addr_type;
1057
1058         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1059                 return -EINVAL;
1060 #ifndef CONFIG_IPV6_SUBTREES
1061         if (cfg->fc_src_len)
1062                 return -EINVAL;
1063 #endif
1064         if (cfg->fc_ifindex) {
1065                 err = -ENODEV;
1066                 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1067                 if (!dev)
1068                         goto out;
1069                 idev = in6_dev_get(dev);
1070                 if (!idev)
1071                         goto out;
1072         }
1073
1074         if (cfg->fc_metric == 0)
1075                 cfg->fc_metric = IP6_RT_PRIO_USER;
1076
1077         table = fib6_new_table(cfg->fc_table);
1078         if (table == NULL) {
1079                 err = -ENOBUFS;
1080                 goto out;
1081         }
1082
1083         rt = ip6_dst_alloc();
1084
1085         if (rt == NULL) {
1086                 err = -ENOMEM;
1087                 goto out;
1088         }
1089
1090         rt->u.dst.obsolete = -1;
1091         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1092
1093         if (cfg->fc_protocol == RTPROT_UNSPEC)
1094                 cfg->fc_protocol = RTPROT_BOOT;
1095         rt->rt6i_protocol = cfg->fc_protocol;
1096
1097         addr_type = ipv6_addr_type(&cfg->fc_dst);
1098
1099         if (addr_type & IPV6_ADDR_MULTICAST)
1100                 rt->u.dst.input = ip6_mc_input;
1101         else
1102                 rt->u.dst.input = ip6_forward;
1103
1104         rt->u.dst.output = ip6_output;
1105
1106         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1107         rt->rt6i_dst.plen = cfg->fc_dst_len;
1108         if (rt->rt6i_dst.plen == 128)
1109                rt->u.dst.flags = DST_HOST;
1110
1111 #ifdef CONFIG_IPV6_SUBTREES
1112         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1113         rt->rt6i_src.plen = cfg->fc_src_len;
1114 #endif
1115
1116         rt->rt6i_metric = cfg->fc_metric;
1117
1118         /* We cannot add true routes via loopback here,
1119            they would result in kernel looping; promote them to reject routes
1120          */
1121         if ((cfg->fc_flags & RTF_REJECT) ||
1122             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1123                 /* hold loopback dev/idev if we haven't done so. */
1124                 if (dev != init_net.loopback_dev) {
1125                         if (dev) {
1126                                 dev_put(dev);
1127                                 in6_dev_put(idev);
1128                         }
1129                         dev = init_net.loopback_dev;
1130                         dev_hold(dev);
1131                         idev = in6_dev_get(dev);
1132                         if (!idev) {
1133                                 err = -ENODEV;
1134                                 goto out;
1135                         }
1136                 }
1137                 rt->u.dst.output = ip6_pkt_discard_out;
1138                 rt->u.dst.input = ip6_pkt_discard;
1139                 rt->u.dst.error = -ENETUNREACH;
1140                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1141                 goto install_route;
1142         }
1143
1144         if (cfg->fc_flags & RTF_GATEWAY) {
1145                 struct in6_addr *gw_addr;
1146                 int gwa_type;
1147
1148                 gw_addr = &cfg->fc_gateway;
1149                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1150                 gwa_type = ipv6_addr_type(gw_addr);
1151
1152                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1153                         struct rt6_info *grt;
1154
1155                         /* IPv6 strictly inhibits using not link-local
1156                            addresses as nexthop address.
1157                            Otherwise, router will not able to send redirects.
1158                            It is very good, but in some (rare!) circumstances
1159                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1160                            some exceptions. --ANK
1161                          */
1162                         err = -EINVAL;
1163                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1164                                 goto out;
1165
1166                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1167
1168                         err = -EHOSTUNREACH;
1169                         if (grt == NULL)
1170                                 goto out;
1171                         if (dev) {
1172                                 if (dev != grt->rt6i_dev) {
1173                                         dst_release(&grt->u.dst);
1174                                         goto out;
1175                                 }
1176                         } else {
1177                                 dev = grt->rt6i_dev;
1178                                 idev = grt->rt6i_idev;
1179                                 dev_hold(dev);
1180                                 in6_dev_hold(grt->rt6i_idev);
1181                         }
1182                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1183                                 err = 0;
1184                         dst_release(&grt->u.dst);
1185
1186                         if (err)
1187                                 goto out;
1188                 }
1189                 err = -EINVAL;
1190                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1191                         goto out;
1192         }
1193
1194         err = -ENODEV;
1195         if (dev == NULL)
1196                 goto out;
1197
1198         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1199                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1200                 if (IS_ERR(rt->rt6i_nexthop)) {
1201                         err = PTR_ERR(rt->rt6i_nexthop);
1202                         rt->rt6i_nexthop = NULL;
1203                         goto out;
1204                 }
1205         }
1206
1207         rt->rt6i_flags = cfg->fc_flags;
1208
1209 install_route:
1210         if (cfg->fc_mx) {
1211                 struct nlattr *nla;
1212                 int remaining;
1213
1214                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1215                         int type = nla_type(nla);
1216
1217                         if (type) {
1218                                 if (type > RTAX_MAX) {
1219                                         err = -EINVAL;
1220                                         goto out;
1221                                 }
1222
1223                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1224                         }
1225                 }
1226         }
1227
1228         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1229                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1230         if (!rt->u.dst.metrics[RTAX_MTU-1])
1231                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1232         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1233                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1234         rt->u.dst.dev = dev;
1235         rt->rt6i_idev = idev;
1236         rt->rt6i_table = table;
1237         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1238
1239 out:
1240         if (dev)
1241                 dev_put(dev);
1242         if (idev)
1243                 in6_dev_put(idev);
1244         if (rt)
1245                 dst_free(&rt->u.dst);
1246         return err;
1247 }
1248
1249 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1250 {
1251         int err;
1252         struct fib6_table *table;
1253
1254         if (rt == &ip6_null_entry)
1255                 return -ENOENT;
1256
1257         table = rt->rt6i_table;
1258         write_lock_bh(&table->tb6_lock);
1259
1260         err = fib6_del(rt, info);
1261         dst_release(&rt->u.dst);
1262
1263         write_unlock_bh(&table->tb6_lock);
1264
1265         return err;
1266 }
1267
1268 int ip6_del_rt(struct rt6_info *rt)
1269 {
1270         struct nl_info info = {};
1271         return __ip6_del_rt(rt, &info);
1272 }
1273
1274 static int ip6_route_del(struct fib6_config *cfg)
1275 {
1276         struct fib6_table *table;
1277         struct fib6_node *fn;
1278         struct rt6_info *rt;
1279         int err = -ESRCH;
1280
1281         table = fib6_get_table(cfg->fc_table);
1282         if (table == NULL)
1283                 return err;
1284
1285         read_lock_bh(&table->tb6_lock);
1286
1287         fn = fib6_locate(&table->tb6_root,
1288                          &cfg->fc_dst, cfg->fc_dst_len,
1289                          &cfg->fc_src, cfg->fc_src_len);
1290
1291         if (fn) {
1292                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1293                         if (cfg->fc_ifindex &&
1294                             (rt->rt6i_dev == NULL ||
1295                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1296                                 continue;
1297                         if (cfg->fc_flags & RTF_GATEWAY &&
1298                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1299                                 continue;
1300                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1301                                 continue;
1302                         dst_hold(&rt->u.dst);
1303                         read_unlock_bh(&table->tb6_lock);
1304
1305                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1306                 }
1307         }
1308         read_unlock_bh(&table->tb6_lock);
1309
1310         return err;
1311 }
1312
1313 /*
1314  *      Handle redirects
1315  */
1316 struct ip6rd_flowi {
1317         struct flowi fl;
1318         struct in6_addr gateway;
1319 };
1320
1321 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1322                                              struct flowi *fl,
1323                                              int flags)
1324 {
1325         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1326         struct rt6_info *rt;
1327         struct fib6_node *fn;
1328
1329         /*
1330          * Get the "current" route for this destination and
1331          * check if the redirect has come from approriate router.
1332          *
1333          * RFC 2461 specifies that redirects should only be
1334          * accepted if they come from the nexthop to the target.
1335          * Due to the way the routes are chosen, this notion
1336          * is a bit fuzzy and one might need to check all possible
1337          * routes.
1338          */
1339
1340         read_lock_bh(&table->tb6_lock);
1341         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1342 restart:
1343         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1344                 /*
1345                  * Current route is on-link; redirect is always invalid.
1346                  *
1347                  * Seems, previous statement is not true. It could
1348                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1349                  * But then router serving it might decide, that we should
1350                  * know truth 8)8) --ANK (980726).
1351                  */
1352                 if (rt6_check_expired(rt))
1353                         continue;
1354                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1355                         continue;
1356                 if (fl->oif != rt->rt6i_dev->ifindex)
1357                         continue;
1358                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1359                         continue;
1360                 break;
1361         }
1362
1363         if (!rt)
1364                 rt = &ip6_null_entry;
1365         BACKTRACK(&fl->fl6_src);
1366 out:
1367         dst_hold(&rt->u.dst);
1368
1369         read_unlock_bh(&table->tb6_lock);
1370
1371         return rt;
1372 };
1373
1374 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1375                                            struct in6_addr *src,
1376                                            struct in6_addr *gateway,
1377                                            struct net_device *dev)
1378 {
1379         int flags = RT6_LOOKUP_F_HAS_SADDR;
1380         struct ip6rd_flowi rdfl = {
1381                 .fl = {
1382                         .oif = dev->ifindex,
1383                         .nl_u = {
1384                                 .ip6_u = {
1385                                         .daddr = *dest,
1386                                         .saddr = *src,
1387                                 },
1388                         },
1389                 },
1390                 .gateway = *gateway,
1391         };
1392
1393         if (rt6_need_strict(dest))
1394                 flags |= RT6_LOOKUP_F_IFACE;
1395
1396         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1397 }
1398
1399 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1400                   struct in6_addr *saddr,
1401                   struct neighbour *neigh, u8 *lladdr, int on_link)
1402 {
1403         struct rt6_info *rt, *nrt = NULL;
1404         struct netevent_redirect netevent;
1405
1406         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1407
1408         if (rt == &ip6_null_entry) {
1409                 if (net_ratelimit())
1410                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1411                                "for redirect target\n");
1412                 goto out;
1413         }
1414
1415         /*
1416          *      We have finally decided to accept it.
1417          */
1418
1419         neigh_update(neigh, lladdr, NUD_STALE,
1420                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1421                      NEIGH_UPDATE_F_OVERRIDE|
1422                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1423                                      NEIGH_UPDATE_F_ISROUTER))
1424                      );
1425
1426         /*
1427          * Redirect received -> path was valid.
1428          * Look, redirects are sent only in response to data packets,
1429          * so that this nexthop apparently is reachable. --ANK
1430          */
1431         dst_confirm(&rt->u.dst);
1432
1433         /* Duplicate redirect: silently ignore. */
1434         if (neigh == rt->u.dst.neighbour)
1435                 goto out;
1436
1437         nrt = ip6_rt_copy(rt);
1438         if (nrt == NULL)
1439                 goto out;
1440
1441         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1442         if (on_link)
1443                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1444
1445         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1446         nrt->rt6i_dst.plen = 128;
1447         nrt->u.dst.flags |= DST_HOST;
1448
1449         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1450         nrt->rt6i_nexthop = neigh_clone(neigh);
1451         /* Reset pmtu, it may be better */
1452         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1453         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1454
1455         if (ip6_ins_rt(nrt))
1456                 goto out;
1457
1458         netevent.old = &rt->u.dst;
1459         netevent.new = &nrt->u.dst;
1460         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1461
1462         if (rt->rt6i_flags&RTF_CACHE) {
1463                 ip6_del_rt(rt);
1464                 return;
1465         }
1466
1467 out:
1468         dst_release(&rt->u.dst);
1469         return;
1470 }
1471
1472 /*
1473  *      Handle ICMP "packet too big" messages
1474  *      i.e. Path MTU discovery
1475  */
1476
1477 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1478                         struct net_device *dev, u32 pmtu)
1479 {
1480         struct rt6_info *rt, *nrt;
1481         int allfrag = 0;
1482
1483         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1484         if (rt == NULL)
1485                 return;
1486
1487         if (pmtu >= dst_mtu(&rt->u.dst))
1488                 goto out;
1489
1490         if (pmtu < IPV6_MIN_MTU) {
1491                 /*
1492                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1493                  * MTU (1280) and a fragment header should always be included
1494                  * after a node receiving Too Big message reporting PMTU is
1495                  * less than the IPv6 Minimum Link MTU.
1496                  */
1497                 pmtu = IPV6_MIN_MTU;
1498                 allfrag = 1;
1499         }
1500
1501         /* New mtu received -> path was valid.
1502            They are sent only in response to data packets,
1503            so that this nexthop apparently is reachable. --ANK
1504          */
1505         dst_confirm(&rt->u.dst);
1506
1507         /* Host route. If it is static, it would be better
1508            not to override it, but add new one, so that
1509            when cache entry will expire old pmtu
1510            would return automatically.
1511          */
1512         if (rt->rt6i_flags & RTF_CACHE) {
1513                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1514                 if (allfrag)
1515                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1516                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1517                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1518                 goto out;
1519         }
1520
1521         /* Network route.
1522            Two cases are possible:
1523            1. It is connected route. Action: COW
1524            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1525          */
1526         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1527                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1528         else
1529                 nrt = rt6_alloc_clone(rt, daddr);
1530
1531         if (nrt) {
1532                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1533                 if (allfrag)
1534                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1535
1536                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1537                  * happened within 5 mins, the recommended timer is 10 mins.
1538                  * Here this route expiration time is set to ip6_rt_mtu_expires
1539                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1540                  * and detecting PMTU increase will be automatically happened.
1541                  */
1542                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1543                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1544
1545                 ip6_ins_rt(nrt);
1546         }
1547 out:
1548         dst_release(&rt->u.dst);
1549 }
1550
1551 /*
1552  *      Misc support functions
1553  */
1554
1555 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1556 {
1557         struct rt6_info *rt = ip6_dst_alloc();
1558
1559         if (rt) {
1560                 rt->u.dst.input = ort->u.dst.input;
1561                 rt->u.dst.output = ort->u.dst.output;
1562
1563                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1564                 rt->u.dst.error = ort->u.dst.error;
1565                 rt->u.dst.dev = ort->u.dst.dev;
1566                 if (rt->u.dst.dev)
1567                         dev_hold(rt->u.dst.dev);
1568                 rt->rt6i_idev = ort->rt6i_idev;
1569                 if (rt->rt6i_idev)
1570                         in6_dev_hold(rt->rt6i_idev);
1571                 rt->u.dst.lastuse = jiffies;
1572                 rt->rt6i_expires = 0;
1573
1574                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1575                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1576                 rt->rt6i_metric = 0;
1577
1578                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1579 #ifdef CONFIG_IPV6_SUBTREES
1580                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1581 #endif
1582                 rt->rt6i_table = ort->rt6i_table;
1583         }
1584         return rt;
1585 }
1586
1587 #ifdef CONFIG_IPV6_ROUTE_INFO
1588 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1589                                            struct in6_addr *gwaddr, int ifindex)
1590 {
1591         struct fib6_node *fn;
1592         struct rt6_info *rt = NULL;
1593         struct fib6_table *table;
1594
1595         table = fib6_get_table(RT6_TABLE_INFO);
1596         if (table == NULL)
1597                 return NULL;
1598
1599         write_lock_bh(&table->tb6_lock);
1600         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1601         if (!fn)
1602                 goto out;
1603
1604         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1605                 if (rt->rt6i_dev->ifindex != ifindex)
1606                         continue;
1607                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1608                         continue;
1609                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1610                         continue;
1611                 dst_hold(&rt->u.dst);
1612                 break;
1613         }
1614 out:
1615         write_unlock_bh(&table->tb6_lock);
1616         return rt;
1617 }
1618
1619 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1620                                            struct in6_addr *gwaddr, int ifindex,
1621                                            unsigned pref)
1622 {
1623         struct fib6_config cfg = {
1624                 .fc_table       = RT6_TABLE_INFO,
1625                 .fc_metric      = 1024,
1626                 .fc_ifindex     = ifindex,
1627                 .fc_dst_len     = prefixlen,
1628                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1629                                   RTF_UP | RTF_PREF(pref),
1630         };
1631
1632         ipv6_addr_copy(&cfg.fc_dst, prefix);
1633         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1634
1635         /* We should treat it as a default route if prefix length is 0. */
1636         if (!prefixlen)
1637                 cfg.fc_flags |= RTF_DEFAULT;
1638
1639         ip6_route_add(&cfg);
1640
1641         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1642 }
1643 #endif
1644
1645 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1646 {
1647         struct rt6_info *rt;
1648         struct fib6_table *table;
1649
1650         table = fib6_get_table(RT6_TABLE_DFLT);
1651         if (table == NULL)
1652                 return NULL;
1653
1654         write_lock_bh(&table->tb6_lock);
1655         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1656                 if (dev == rt->rt6i_dev &&
1657                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1658                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1659                         break;
1660         }
1661         if (rt)
1662                 dst_hold(&rt->u.dst);
1663         write_unlock_bh(&table->tb6_lock);
1664         return rt;
1665 }
1666
1667 EXPORT_SYMBOL(rt6_get_dflt_router);
1668
1669 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1670                                      struct net_device *dev,
1671                                      unsigned int pref)
1672 {
1673         struct fib6_config cfg = {
1674                 .fc_table       = RT6_TABLE_DFLT,
1675                 .fc_metric      = 1024,
1676                 .fc_ifindex     = dev->ifindex,
1677                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1678                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1679         };
1680
1681         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1682
1683         ip6_route_add(&cfg);
1684
1685         return rt6_get_dflt_router(gwaddr, dev);
1686 }
1687
1688 void rt6_purge_dflt_routers(void)
1689 {
1690         struct rt6_info *rt;
1691         struct fib6_table *table;
1692
1693         /* NOTE: Keep consistent with rt6_get_dflt_router */
1694         table = fib6_get_table(RT6_TABLE_DFLT);
1695         if (table == NULL)
1696                 return;
1697
1698 restart:
1699         read_lock_bh(&table->tb6_lock);
1700         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1701                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1702                         dst_hold(&rt->u.dst);
1703                         read_unlock_bh(&table->tb6_lock);
1704                         ip6_del_rt(rt);
1705                         goto restart;
1706                 }
1707         }
1708         read_unlock_bh(&table->tb6_lock);
1709 }
1710
1711 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1712                                  struct fib6_config *cfg)
1713 {
1714         memset(cfg, 0, sizeof(*cfg));
1715
1716         cfg->fc_table = RT6_TABLE_MAIN;
1717         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1718         cfg->fc_metric = rtmsg->rtmsg_metric;
1719         cfg->fc_expires = rtmsg->rtmsg_info;
1720         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1721         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1722         cfg->fc_flags = rtmsg->rtmsg_flags;
1723
1724         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1725         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1726         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1727 }
1728
1729 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1730 {
1731         struct fib6_config cfg;
1732         struct in6_rtmsg rtmsg;
1733         int err;
1734
1735         switch(cmd) {
1736         case SIOCADDRT:         /* Add a route */
1737         case SIOCDELRT:         /* Delete a route */
1738                 if (!capable(CAP_NET_ADMIN))
1739                         return -EPERM;
1740                 err = copy_from_user(&rtmsg, arg,
1741                                      sizeof(struct in6_rtmsg));
1742                 if (err)
1743                         return -EFAULT;
1744
1745                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1746
1747                 rtnl_lock();
1748                 switch (cmd) {
1749                 case SIOCADDRT:
1750                         err = ip6_route_add(&cfg);
1751                         break;
1752                 case SIOCDELRT:
1753                         err = ip6_route_del(&cfg);
1754                         break;
1755                 default:
1756                         err = -EINVAL;
1757                 }
1758                 rtnl_unlock();
1759
1760                 return err;
1761         }
1762
1763         return -EINVAL;
1764 }
1765
1766 /*
1767  *      Drop the packet on the floor
1768  */
1769
1770 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1771                                int ipstats_mib_noroutes)
1772 {
1773         int type;
1774         switch (ipstats_mib_noroutes) {
1775         case IPSTATS_MIB_INNOROUTES:
1776                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1777                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1778                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1779                         break;
1780                 }
1781                 /* FALLTHROUGH */
1782         case IPSTATS_MIB_OUTNOROUTES:
1783                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1784                 break;
1785         }
1786         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1787         kfree_skb(skb);
1788         return 0;
1789 }
1790
1791 static int ip6_pkt_discard(struct sk_buff *skb)
1792 {
1793         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1794 }
1795
1796 static int ip6_pkt_discard_out(struct sk_buff *skb)
1797 {
1798         skb->dev = skb->dst->dev;
1799         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1800 }
1801
1802 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1803
1804 static int ip6_pkt_prohibit(struct sk_buff *skb)
1805 {
1806         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1807 }
1808
1809 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1810 {
1811         skb->dev = skb->dst->dev;
1812         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1813 }
1814
1815 #endif
1816
1817 /*
1818  *      Allocate a dst for local (unicast / anycast) address.
1819  */
1820
1821 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1822                                     const struct in6_addr *addr,
1823                                     int anycast)
1824 {
1825         struct rt6_info *rt = ip6_dst_alloc();
1826
1827         if (rt == NULL)
1828                 return ERR_PTR(-ENOMEM);
1829
1830         dev_hold(init_net.loopback_dev);
1831         in6_dev_hold(idev);
1832
1833         rt->u.dst.flags = DST_HOST;
1834         rt->u.dst.input = ip6_input;
1835         rt->u.dst.output = ip6_output;
1836         rt->rt6i_dev = init_net.loopback_dev;
1837         rt->rt6i_idev = idev;
1838         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1839         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1840         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1841         rt->u.dst.obsolete = -1;
1842
1843         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1844         if (anycast)
1845                 rt->rt6i_flags |= RTF_ANYCAST;
1846         else
1847                 rt->rt6i_flags |= RTF_LOCAL;
1848         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1849         if (rt->rt6i_nexthop == NULL) {
1850                 dst_free(&rt->u.dst);
1851                 return ERR_PTR(-ENOMEM);
1852         }
1853
1854         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1855         rt->rt6i_dst.plen = 128;
1856         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1857
1858         atomic_set(&rt->u.dst.__refcnt, 1);
1859
1860         return rt;
1861 }
1862
1863 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1864 {
1865         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1866             rt != &ip6_null_entry) {
1867                 RT6_TRACE("deleted by ifdown %p\n", rt);
1868                 return -1;
1869         }
1870         return 0;
1871 }
1872
1873 void rt6_ifdown(struct net_device *dev)
1874 {
1875         fib6_clean_all(fib6_ifdown, 0, dev);
1876 }
1877
1878 struct rt6_mtu_change_arg
1879 {
1880         struct net_device *dev;
1881         unsigned mtu;
1882 };
1883
1884 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1885 {
1886         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1887         struct inet6_dev *idev;
1888
1889         /* In IPv6 pmtu discovery is not optional,
1890            so that RTAX_MTU lock cannot disable it.
1891            We still use this lock to block changes
1892            caused by addrconf/ndisc.
1893         */
1894
1895         idev = __in6_dev_get(arg->dev);
1896         if (idev == NULL)
1897                 return 0;
1898
1899         /* For administrative MTU increase, there is no way to discover
1900            IPv6 PMTU increase, so PMTU increase should be updated here.
1901            Since RFC 1981 doesn't include administrative MTU increase
1902            update PMTU increase is a MUST. (i.e. jumbo frame)
1903          */
1904         /*
1905            If new MTU is less than route PMTU, this new MTU will be the
1906            lowest MTU in the path, update the route PMTU to reflect PMTU
1907            decreases; if new MTU is greater than route PMTU, and the
1908            old MTU is the lowest MTU in the path, update the route PMTU
1909            to reflect the increase. In this case if the other nodes' MTU
1910            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1911            PMTU discouvery.
1912          */
1913         if (rt->rt6i_dev == arg->dev &&
1914             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1915             (dst_mtu(&rt->u.dst) > arg->mtu ||
1916              (dst_mtu(&rt->u.dst) < arg->mtu &&
1917               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1918                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1919                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1920         }
1921         return 0;
1922 }
1923
1924 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1925 {
1926         struct rt6_mtu_change_arg arg = {
1927                 .dev = dev,
1928                 .mtu = mtu,
1929         };
1930
1931         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1932 }
1933
1934 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1935         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1936         [RTA_OIF]               = { .type = NLA_U32 },
1937         [RTA_IIF]               = { .type = NLA_U32 },
1938         [RTA_PRIORITY]          = { .type = NLA_U32 },
1939         [RTA_METRICS]           = { .type = NLA_NESTED },
1940 };
1941
1942 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1943                               struct fib6_config *cfg)
1944 {
1945         struct rtmsg *rtm;
1946         struct nlattr *tb[RTA_MAX+1];
1947         int err;
1948
1949         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1950         if (err < 0)
1951                 goto errout;
1952
1953         err = -EINVAL;
1954         rtm = nlmsg_data(nlh);
1955         memset(cfg, 0, sizeof(*cfg));
1956
1957         cfg->fc_table = rtm->rtm_table;
1958         cfg->fc_dst_len = rtm->rtm_dst_len;
1959         cfg->fc_src_len = rtm->rtm_src_len;
1960         cfg->fc_flags = RTF_UP;
1961         cfg->fc_protocol = rtm->rtm_protocol;
1962
1963         if (rtm->rtm_type == RTN_UNREACHABLE)
1964                 cfg->fc_flags |= RTF_REJECT;
1965
1966         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1967         cfg->fc_nlinfo.nlh = nlh;
1968
1969         if (tb[RTA_GATEWAY]) {
1970                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1971                 cfg->fc_flags |= RTF_GATEWAY;
1972         }
1973
1974         if (tb[RTA_DST]) {
1975                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1976
1977                 if (nla_len(tb[RTA_DST]) < plen)
1978                         goto errout;
1979
1980                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1981         }
1982
1983         if (tb[RTA_SRC]) {
1984                 int plen = (rtm->rtm_src_len + 7) >> 3;
1985
1986                 if (nla_len(tb[RTA_SRC]) < plen)
1987                         goto errout;
1988
1989                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1990         }
1991
1992         if (tb[RTA_OIF])
1993                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1994
1995         if (tb[RTA_PRIORITY])
1996                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1997
1998         if (tb[RTA_METRICS]) {
1999                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2000                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2001         }
2002
2003         if (tb[RTA_TABLE])
2004                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2005
2006         err = 0;
2007 errout:
2008         return err;
2009 }
2010
2011 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2012 {
2013         struct net *net = skb->sk->sk_net;
2014         struct fib6_config cfg;
2015         int err;
2016
2017         if (net != &init_net)
2018                 return -EINVAL;
2019
2020         err = rtm_to_fib6_config(skb, nlh, &cfg);
2021         if (err < 0)
2022                 return err;
2023
2024         return ip6_route_del(&cfg);
2025 }
2026
2027 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2028 {
2029         struct net *net = skb->sk->sk_net;
2030         struct fib6_config cfg;
2031         int err;
2032
2033         if (net != &init_net)
2034                 return -EINVAL;
2035
2036         err = rtm_to_fib6_config(skb, nlh, &cfg);
2037         if (err < 0)
2038                 return err;
2039
2040         return ip6_route_add(&cfg);
2041 }
2042
2043 static inline size_t rt6_nlmsg_size(void)
2044 {
2045         return NLMSG_ALIGN(sizeof(struct rtmsg))
2046                + nla_total_size(16) /* RTA_SRC */
2047                + nla_total_size(16) /* RTA_DST */
2048                + nla_total_size(16) /* RTA_GATEWAY */
2049                + nla_total_size(16) /* RTA_PREFSRC */
2050                + nla_total_size(4) /* RTA_TABLE */
2051                + nla_total_size(4) /* RTA_IIF */
2052                + nla_total_size(4) /* RTA_OIF */
2053                + nla_total_size(4) /* RTA_PRIORITY */
2054                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2055                + nla_total_size(sizeof(struct rta_cacheinfo));
2056 }
2057
2058 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2059                          struct in6_addr *dst, struct in6_addr *src,
2060                          int iif, int type, u32 pid, u32 seq,
2061                          int prefix, unsigned int flags)
2062 {
2063         struct rtmsg *rtm;
2064         struct nlmsghdr *nlh;
2065         long expires;
2066         u32 table;
2067
2068         if (prefix) {   /* user wants prefix routes only */
2069                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2070                         /* success since this is not a prefix route */
2071                         return 1;
2072                 }
2073         }
2074
2075         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2076         if (nlh == NULL)
2077                 return -EMSGSIZE;
2078
2079         rtm = nlmsg_data(nlh);
2080         rtm->rtm_family = AF_INET6;
2081         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2082         rtm->rtm_src_len = rt->rt6i_src.plen;
2083         rtm->rtm_tos = 0;
2084         if (rt->rt6i_table)
2085                 table = rt->rt6i_table->tb6_id;
2086         else
2087                 table = RT6_TABLE_UNSPEC;
2088         rtm->rtm_table = table;
2089         NLA_PUT_U32(skb, RTA_TABLE, table);
2090         if (rt->rt6i_flags&RTF_REJECT)
2091                 rtm->rtm_type = RTN_UNREACHABLE;
2092         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2093                 rtm->rtm_type = RTN_LOCAL;
2094         else
2095                 rtm->rtm_type = RTN_UNICAST;
2096         rtm->rtm_flags = 0;
2097         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2098         rtm->rtm_protocol = rt->rt6i_protocol;
2099         if (rt->rt6i_flags&RTF_DYNAMIC)
2100                 rtm->rtm_protocol = RTPROT_REDIRECT;
2101         else if (rt->rt6i_flags & RTF_ADDRCONF)
2102                 rtm->rtm_protocol = RTPROT_KERNEL;
2103         else if (rt->rt6i_flags&RTF_DEFAULT)
2104                 rtm->rtm_protocol = RTPROT_RA;
2105
2106         if (rt->rt6i_flags&RTF_CACHE)
2107                 rtm->rtm_flags |= RTM_F_CLONED;
2108
2109         if (dst) {
2110                 NLA_PUT(skb, RTA_DST, 16, dst);
2111                 rtm->rtm_dst_len = 128;
2112         } else if (rtm->rtm_dst_len)
2113                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2114 #ifdef CONFIG_IPV6_SUBTREES
2115         if (src) {
2116                 NLA_PUT(skb, RTA_SRC, 16, src);
2117                 rtm->rtm_src_len = 128;
2118         } else if (rtm->rtm_src_len)
2119                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2120 #endif
2121         if (iif)
2122                 NLA_PUT_U32(skb, RTA_IIF, iif);
2123         else if (dst) {
2124                 struct in6_addr saddr_buf;
2125                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2126                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2127         }
2128
2129         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2130                 goto nla_put_failure;
2131
2132         if (rt->u.dst.neighbour)
2133                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2134
2135         if (rt->u.dst.dev)
2136                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2137
2138         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2139
2140         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2141         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2142                                expires, rt->u.dst.error) < 0)
2143                 goto nla_put_failure;
2144
2145         return nlmsg_end(skb, nlh);
2146
2147 nla_put_failure:
2148         nlmsg_cancel(skb, nlh);
2149         return -EMSGSIZE;
2150 }
2151
2152 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2153 {
2154         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2155         int prefix;
2156
2157         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2158                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2159                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2160         } else
2161                 prefix = 0;
2162
2163         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2164                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2165                      prefix, NLM_F_MULTI);
2166 }
2167
2168 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2169 {
2170         struct net *net = in_skb->sk->sk_net;
2171         struct nlattr *tb[RTA_MAX+1];
2172         struct rt6_info *rt;
2173         struct sk_buff *skb;
2174         struct rtmsg *rtm;
2175         struct flowi fl;
2176         int err, iif = 0;
2177
2178         if (net != &init_net)
2179                 return -EINVAL;
2180
2181         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2182         if (err < 0)
2183                 goto errout;
2184
2185         err = -EINVAL;
2186         memset(&fl, 0, sizeof(fl));
2187
2188         if (tb[RTA_SRC]) {
2189                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2190                         goto errout;
2191
2192                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2193         }
2194
2195         if (tb[RTA_DST]) {
2196                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2197                         goto errout;
2198
2199                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2200         }
2201
2202         if (tb[RTA_IIF])
2203                 iif = nla_get_u32(tb[RTA_IIF]);
2204
2205         if (tb[RTA_OIF])
2206                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2207
2208         if (iif) {
2209                 struct net_device *dev;
2210                 dev = __dev_get_by_index(&init_net, iif);
2211                 if (!dev) {
2212                         err = -ENODEV;
2213                         goto errout;
2214                 }
2215         }
2216
2217         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2218         if (skb == NULL) {
2219                 err = -ENOBUFS;
2220                 goto errout;
2221         }
2222
2223         /* Reserve room for dummy headers, this skb can pass
2224            through good chunk of routing engine.
2225          */
2226         skb_reset_mac_header(skb);
2227         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2228
2229         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2230         skb->dst = &rt->u.dst;
2231
2232         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2233                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2234                             nlh->nlmsg_seq, 0, 0);
2235         if (err < 0) {
2236                 kfree_skb(skb);
2237                 goto errout;
2238         }
2239
2240         err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
2241 errout:
2242         return err;
2243 }
2244
2245 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2246 {
2247         struct sk_buff *skb;
2248         u32 seq;
2249         int err;
2250
2251         err = -ENOBUFS;
2252         seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2253
2254         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2255         if (skb == NULL)
2256                 goto errout;
2257
2258         err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2259                                 event, info->pid, seq, 0, 0);
2260         if (err < 0) {
2261                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2262                 WARN_ON(err == -EMSGSIZE);
2263                 kfree_skb(skb);
2264                 goto errout;
2265         }
2266         err = rtnl_notify(skb, &init_net, info->pid,
2267                                 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
2268 errout:
2269         if (err < 0)
2270                 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
2271 }
2272
2273 /*
2274  *      /proc
2275  */
2276
2277 #ifdef CONFIG_PROC_FS
2278
2279 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2280
2281 struct rt6_proc_arg
2282 {
2283         char *buffer;
2284         int offset;
2285         int length;
2286         int skip;
2287         int len;
2288 };
2289
2290 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2291 {
2292         struct seq_file *m = p_arg;
2293
2294         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2295                    rt->rt6i_dst.plen);
2296
2297 #ifdef CONFIG_IPV6_SUBTREES
2298         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2299                    rt->rt6i_src.plen);
2300 #else
2301         seq_puts(m, "00000000000000000000000000000000 00 ");
2302 #endif
2303
2304         if (rt->rt6i_nexthop) {
2305                 seq_printf(m, NIP6_SEQFMT,
2306                            NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2307         } else {
2308                 seq_puts(m, "00000000000000000000000000000000");
2309         }
2310         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2311                    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2312                    rt->u.dst.__use, rt->rt6i_flags,
2313                    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2314         return 0;
2315 }
2316
2317 static int ipv6_route_show(struct seq_file *m, void *v)
2318 {
2319         fib6_clean_all(rt6_info_route, 0, m);
2320         return 0;
2321 }
2322
2323 static int ipv6_route_open(struct inode *inode, struct file *file)
2324 {
2325         return single_open(file, ipv6_route_show, NULL);
2326 }
2327
2328 static const struct file_operations ipv6_route_proc_fops = {
2329         .owner          = THIS_MODULE,
2330         .open           = ipv6_route_open,
2331         .read           = seq_read,
2332         .llseek         = seq_lseek,
2333         .release        = single_release,
2334 };
2335
2336 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2337 {
2338         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2339                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2340                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2341                       rt6_stats.fib_rt_cache,
2342                       atomic_read(&ip6_dst_ops.entries),
2343                       rt6_stats.fib_discarded_routes);
2344
2345         return 0;
2346 }
2347
2348 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2349 {
2350         return single_open(file, rt6_stats_seq_show, NULL);
2351 }
2352
2353 static const struct file_operations rt6_stats_seq_fops = {
2354         .owner   = THIS_MODULE,
2355         .open    = rt6_stats_seq_open,
2356         .read    = seq_read,
2357         .llseek  = seq_lseek,
2358         .release = single_release,
2359 };
2360
2361 static int ipv6_route_proc_init(struct net *net)
2362 {
2363         int ret = -ENOMEM;
2364         if (!proc_net_fops_create(net, "ipv6_route",
2365                                   0, &ipv6_route_proc_fops))
2366                 goto out;
2367
2368         if (!proc_net_fops_create(net, "rt6_stats",
2369                                   S_IRUGO, &rt6_stats_seq_fops))
2370                 goto out_ipv6_route;
2371
2372         ret = 0;
2373 out:
2374         return ret;
2375 out_ipv6_route:
2376         proc_net_remove(net, "ipv6_route");
2377         goto out;
2378 }
2379
2380 static void ipv6_route_proc_fini(struct net *net)
2381 {
2382         proc_net_remove(net, "ipv6_route");
2383         proc_net_remove(net, "rt6_stats");
2384 }
2385 #else
2386 static inline int ipv6_route_proc_init(struct net *net)
2387 {
2388         return 0;
2389 }
2390 static inline void ipv6_route_proc_fini(struct net *net)
2391 {
2392         return ;
2393 }
2394 #endif  /* CONFIG_PROC_FS */
2395
2396 #ifdef CONFIG_SYSCTL
2397
2398 static int flush_delay;
2399
2400 static
2401 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2402                               void __user *buffer, size_t *lenp, loff_t *ppos)
2403 {
2404         if (write) {
2405                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2406                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2407                 return 0;
2408         } else
2409                 return -EINVAL;
2410 }
2411
2412 ctl_table ipv6_route_table_template[] = {
2413         {
2414                 .procname       =       "flush",
2415                 .data           =       &flush_delay,
2416                 .maxlen         =       sizeof(int),
2417                 .mode           =       0200,
2418                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2419         },
2420         {
2421                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2422                 .procname       =       "gc_thresh",
2423                 .data           =       &ip6_dst_ops.gc_thresh,
2424                 .maxlen         =       sizeof(int),
2425                 .mode           =       0644,
2426                 .proc_handler   =       &proc_dointvec,
2427         },
2428         {
2429                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2430                 .procname       =       "max_size",
2431                 .data           =       &ip6_rt_max_size,
2432                 .maxlen         =       sizeof(int),
2433                 .mode           =       0644,
2434                 .proc_handler   =       &proc_dointvec,
2435         },
2436         {
2437                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2438                 .procname       =       "gc_min_interval",
2439                 .data           =       &ip6_rt_gc_min_interval,
2440                 .maxlen         =       sizeof(int),
2441                 .mode           =       0644,
2442                 .proc_handler   =       &proc_dointvec_jiffies,
2443                 .strategy       =       &sysctl_jiffies,
2444         },
2445         {
2446                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2447                 .procname       =       "gc_timeout",
2448                 .data           =       &ip6_rt_gc_timeout,
2449                 .maxlen         =       sizeof(int),
2450                 .mode           =       0644,
2451                 .proc_handler   =       &proc_dointvec_jiffies,
2452                 .strategy       =       &sysctl_jiffies,
2453         },
2454         {
2455                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2456                 .procname       =       "gc_interval",
2457                 .data           =       &ip6_rt_gc_interval,
2458                 .maxlen         =       sizeof(int),
2459                 .mode           =       0644,
2460                 .proc_handler   =       &proc_dointvec_jiffies,
2461                 .strategy       =       &sysctl_jiffies,
2462         },
2463         {
2464                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2465                 .procname       =       "gc_elasticity",
2466                 .data           =       &ip6_rt_gc_elasticity,
2467                 .maxlen         =       sizeof(int),
2468                 .mode           =       0644,
2469                 .proc_handler   =       &proc_dointvec_jiffies,
2470                 .strategy       =       &sysctl_jiffies,
2471         },
2472         {
2473                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2474                 .procname       =       "mtu_expires",
2475                 .data           =       &ip6_rt_mtu_expires,
2476                 .maxlen         =       sizeof(int),
2477                 .mode           =       0644,
2478                 .proc_handler   =       &proc_dointvec_jiffies,
2479                 .strategy       =       &sysctl_jiffies,
2480         },
2481         {
2482                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2483                 .procname       =       "min_adv_mss",
2484                 .data           =       &ip6_rt_min_advmss,
2485                 .maxlen         =       sizeof(int),
2486                 .mode           =       0644,
2487                 .proc_handler   =       &proc_dointvec_jiffies,
2488                 .strategy       =       &sysctl_jiffies,
2489         },
2490         {
2491                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2492                 .procname       =       "gc_min_interval_ms",
2493                 .data           =       &ip6_rt_gc_min_interval,
2494                 .maxlen         =       sizeof(int),
2495                 .mode           =       0644,
2496                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2497                 .strategy       =       &sysctl_ms_jiffies,
2498         },
2499         { .ctl_name = 0 }
2500 };
2501
2502 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2503 {
2504         struct ctl_table *table;
2505
2506         table = kmemdup(ipv6_route_table_template,
2507                         sizeof(ipv6_route_table_template),
2508                         GFP_KERNEL);
2509         return table;
2510 }
2511 #endif
2512
2513 int __init ip6_route_init(void)
2514 {
2515         int ret;
2516
2517         ip6_dst_ops.kmem_cachep =
2518                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2519                                   SLAB_HWCACHE_ALIGN, NULL);
2520         if (!ip6_dst_ops.kmem_cachep)
2521                 return -ENOMEM;
2522
2523         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2524
2525         ret = fib6_init();
2526         if (ret)
2527                 goto out_kmem_cache;
2528
2529         ret = ipv6_route_proc_init(&init_net);
2530         if (ret)
2531                 goto out_fib6_init;
2532
2533         ret = xfrm6_init();
2534         if (ret)
2535                 goto out_proc_init;
2536
2537         ret = fib6_rules_init();
2538         if (ret)
2539                 goto xfrm6_init;
2540
2541         ret = -ENOBUFS;
2542         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2543             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2544             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2545                 goto fib6_rules_init;
2546
2547         ret = 0;
2548 out:
2549         return ret;
2550
2551 fib6_rules_init:
2552         fib6_rules_cleanup();
2553 xfrm6_init:
2554         xfrm6_fini();
2555 out_proc_init:
2556         ipv6_route_proc_fini(&init_net);
2557 out_fib6_init:
2558         rt6_ifdown(NULL);
2559         fib6_gc_cleanup();
2560 out_kmem_cache:
2561         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2562         goto out;
2563 }
2564
2565 void ip6_route_cleanup(void)
2566 {
2567         fib6_rules_cleanup();
2568         ipv6_route_proc_fini(&init_net);
2569         xfrm6_fini();
2570         rt6_ifdown(NULL);
2571         fib6_gc_cleanup();
2572         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2573 }