[NETNS][DST] dst: pass the dst_ops as parameter to the gc functions
[safe/jmp/linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <net/net_namespace.h>
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52 #include <net/dst.h>
53 #include <net/xfrm.h>
54 #include <net/netevent.h>
55 #include <net/netlink.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74 #define CLONE_OFFLINK_ROUTE 0
75
76 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79 static void             ip6_dst_destroy(struct dst_entry *);
80 static void             ip6_dst_ifdown(struct dst_entry *,
81                                        struct net_device *dev, int how);
82 static int               ip6_dst_gc(struct dst_ops *ops);
83
84 static int              ip6_pkt_discard(struct sk_buff *skb);
85 static int              ip6_pkt_discard_out(struct sk_buff *skb);
86 static void             ip6_link_failure(struct sk_buff *skb);
87 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
89 #ifdef CONFIG_IPV6_ROUTE_INFO
90 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
91                                            struct in6_addr *gwaddr, int ifindex,
92                                            unsigned pref);
93 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
94                                            struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static struct dst_ops ip6_dst_ops = {
98         .family                 =       AF_INET6,
99         .protocol               =       __constant_htons(ETH_P_IPV6),
100         .gc                     =       ip6_dst_gc,
101         .gc_thresh              =       1024,
102         .check                  =       ip6_dst_check,
103         .destroy                =       ip6_dst_destroy,
104         .ifdown                 =       ip6_dst_ifdown,
105         .negative_advice        =       ip6_negative_advice,
106         .link_failure           =       ip6_link_failure,
107         .update_pmtu            =       ip6_rt_update_pmtu,
108         .local_out              =       ip6_local_out,
109         .entry_size             =       sizeof(struct rt6_info),
110 };
111
112 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
113 {
114 }
115
116 static struct dst_ops ip6_dst_blackhole_ops = {
117         .family                 =       AF_INET6,
118         .protocol               =       __constant_htons(ETH_P_IPV6),
119         .destroy                =       ip6_dst_destroy,
120         .check                  =       ip6_dst_check,
121         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
122         .entry_size             =       sizeof(struct rt6_info),
123 };
124
125 struct rt6_info ip6_null_entry = {
126         .u = {
127                 .dst = {
128                         .__refcnt       = ATOMIC_INIT(1),
129                         .__use          = 1,
130                         .obsolete       = -1,
131                         .error          = -ENETUNREACH,
132                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
133                         .input          = ip6_pkt_discard,
134                         .output         = ip6_pkt_discard_out,
135                         .ops            = &ip6_dst_ops,
136                         .path           = (struct dst_entry*)&ip6_null_entry,
137                 }
138         },
139         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
140         .rt6i_metric    = ~(u32) 0,
141         .rt6i_ref       = ATOMIC_INIT(1),
142 };
143
144 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
145
146 static int ip6_pkt_prohibit(struct sk_buff *skb);
147 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
148
149 struct rt6_info ip6_prohibit_entry = {
150         .u = {
151                 .dst = {
152                         .__refcnt       = ATOMIC_INIT(1),
153                         .__use          = 1,
154                         .obsolete       = -1,
155                         .error          = -EACCES,
156                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
157                         .input          = ip6_pkt_prohibit,
158                         .output         = ip6_pkt_prohibit_out,
159                         .ops            = &ip6_dst_ops,
160                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
161                 }
162         },
163         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
164         .rt6i_metric    = ~(u32) 0,
165         .rt6i_ref       = ATOMIC_INIT(1),
166 };
167
168 struct rt6_info ip6_blk_hole_entry = {
169         .u = {
170                 .dst = {
171                         .__refcnt       = ATOMIC_INIT(1),
172                         .__use          = 1,
173                         .obsolete       = -1,
174                         .error          = -EINVAL,
175                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
176                         .input          = dst_discard,
177                         .output         = dst_discard,
178                         .ops            = &ip6_dst_ops,
179                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
180                 }
181         },
182         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
183         .rt6i_metric    = ~(u32) 0,
184         .rt6i_ref       = ATOMIC_INIT(1),
185 };
186
187 #endif
188
189 /* allocate dst with ip6_dst_ops */
190 static __inline__ struct rt6_info *ip6_dst_alloc(void)
191 {
192         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
193 }
194
195 static void ip6_dst_destroy(struct dst_entry *dst)
196 {
197         struct rt6_info *rt = (struct rt6_info *)dst;
198         struct inet6_dev *idev = rt->rt6i_idev;
199
200         if (idev != NULL) {
201                 rt->rt6i_idev = NULL;
202                 in6_dev_put(idev);
203         }
204 }
205
206 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
207                            int how)
208 {
209         struct rt6_info *rt = (struct rt6_info *)dst;
210         struct inet6_dev *idev = rt->rt6i_idev;
211         struct net_device *loopback_dev =
212                 dev->nd_net->loopback_dev;
213
214         if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
215                 struct inet6_dev *loopback_idev =
216                         in6_dev_get(loopback_dev);
217                 if (loopback_idev != NULL) {
218                         rt->rt6i_idev = loopback_idev;
219                         in6_dev_put(idev);
220                 }
221         }
222 }
223
224 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
225 {
226         return (rt->rt6i_flags & RTF_EXPIRES &&
227                 time_after(jiffies, rt->rt6i_expires));
228 }
229
230 static inline int rt6_need_strict(struct in6_addr *daddr)
231 {
232         return (ipv6_addr_type(daddr) &
233                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
234 }
235
236 /*
237  *      Route lookup. Any table->tb6_lock is implied.
238  */
239
240 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
241                                                     int oif,
242                                                     int strict)
243 {
244         struct rt6_info *local = NULL;
245         struct rt6_info *sprt;
246
247         if (oif) {
248                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
249                         struct net_device *dev = sprt->rt6i_dev;
250                         if (dev->ifindex == oif)
251                                 return sprt;
252                         if (dev->flags & IFF_LOOPBACK) {
253                                 if (sprt->rt6i_idev == NULL ||
254                                     sprt->rt6i_idev->dev->ifindex != oif) {
255                                         if (strict && oif)
256                                                 continue;
257                                         if (local && (!oif ||
258                                                       local->rt6i_idev->dev->ifindex == oif))
259                                                 continue;
260                                 }
261                                 local = sprt;
262                         }
263                 }
264
265                 if (local)
266                         return local;
267
268                 if (strict)
269                         return &ip6_null_entry;
270         }
271         return rt;
272 }
273
274 #ifdef CONFIG_IPV6_ROUTER_PREF
275 static void rt6_probe(struct rt6_info *rt)
276 {
277         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
278         /*
279          * Okay, this does not seem to be appropriate
280          * for now, however, we need to check if it
281          * is really so; aka Router Reachability Probing.
282          *
283          * Router Reachability Probe MUST be rate-limited
284          * to no more than one per minute.
285          */
286         if (!neigh || (neigh->nud_state & NUD_VALID))
287                 return;
288         read_lock_bh(&neigh->lock);
289         if (!(neigh->nud_state & NUD_VALID) &&
290             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
291                 struct in6_addr mcaddr;
292                 struct in6_addr *target;
293
294                 neigh->updated = jiffies;
295                 read_unlock_bh(&neigh->lock);
296
297                 target = (struct in6_addr *)&neigh->primary_key;
298                 addrconf_addr_solict_mult(target, &mcaddr);
299                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
300         } else
301                 read_unlock_bh(&neigh->lock);
302 }
303 #else
304 static inline void rt6_probe(struct rt6_info *rt)
305 {
306         return;
307 }
308 #endif
309
310 /*
311  * Default Router Selection (RFC 2461 6.3.6)
312  */
313 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
314 {
315         struct net_device *dev = rt->rt6i_dev;
316         if (!oif || dev->ifindex == oif)
317                 return 2;
318         if ((dev->flags & IFF_LOOPBACK) &&
319             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
320                 return 1;
321         return 0;
322 }
323
324 static inline int rt6_check_neigh(struct rt6_info *rt)
325 {
326         struct neighbour *neigh = rt->rt6i_nexthop;
327         int m;
328         if (rt->rt6i_flags & RTF_NONEXTHOP ||
329             !(rt->rt6i_flags & RTF_GATEWAY))
330                 m = 1;
331         else if (neigh) {
332                 read_lock_bh(&neigh->lock);
333                 if (neigh->nud_state & NUD_VALID)
334                         m = 2;
335 #ifdef CONFIG_IPV6_ROUTER_PREF
336                 else if (neigh->nud_state & NUD_FAILED)
337                         m = 0;
338 #endif
339                 else
340                         m = 1;
341                 read_unlock_bh(&neigh->lock);
342         } else
343                 m = 0;
344         return m;
345 }
346
347 static int rt6_score_route(struct rt6_info *rt, int oif,
348                            int strict)
349 {
350         int m, n;
351
352         m = rt6_check_dev(rt, oif);
353         if (!m && (strict & RT6_LOOKUP_F_IFACE))
354                 return -1;
355 #ifdef CONFIG_IPV6_ROUTER_PREF
356         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
357 #endif
358         n = rt6_check_neigh(rt);
359         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
360                 return -1;
361         return m;
362 }
363
364 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
365                                    int *mpri, struct rt6_info *match)
366 {
367         int m;
368
369         if (rt6_check_expired(rt))
370                 goto out;
371
372         m = rt6_score_route(rt, oif, strict);
373         if (m < 0)
374                 goto out;
375
376         if (m > *mpri) {
377                 if (strict & RT6_LOOKUP_F_REACHABLE)
378                         rt6_probe(match);
379                 *mpri = m;
380                 match = rt;
381         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
382                 rt6_probe(rt);
383         }
384
385 out:
386         return match;
387 }
388
389 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
390                                      struct rt6_info *rr_head,
391                                      u32 metric, int oif, int strict)
392 {
393         struct rt6_info *rt, *match;
394         int mpri = -1;
395
396         match = NULL;
397         for (rt = rr_head; rt && rt->rt6i_metric == metric;
398              rt = rt->u.dst.rt6_next)
399                 match = find_match(rt, oif, strict, &mpri, match);
400         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
401              rt = rt->u.dst.rt6_next)
402                 match = find_match(rt, oif, strict, &mpri, match);
403
404         return match;
405 }
406
407 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
408 {
409         struct rt6_info *match, *rt0;
410
411         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
412                   __FUNCTION__, fn->leaf, oif);
413
414         rt0 = fn->rr_ptr;
415         if (!rt0)
416                 fn->rr_ptr = rt0 = fn->leaf;
417
418         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
419
420         if (!match &&
421             (strict & RT6_LOOKUP_F_REACHABLE)) {
422                 struct rt6_info *next = rt0->u.dst.rt6_next;
423
424                 /* no entries matched; do round-robin */
425                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
426                         next = fn->leaf;
427
428                 if (next != rt0)
429                         fn->rr_ptr = next;
430         }
431
432         RT6_TRACE("%s() => %p\n",
433                   __FUNCTION__, match);
434
435         return (match ? match : &ip6_null_entry);
436 }
437
438 #ifdef CONFIG_IPV6_ROUTE_INFO
439 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
440                   struct in6_addr *gwaddr)
441 {
442         struct route_info *rinfo = (struct route_info *) opt;
443         struct in6_addr prefix_buf, *prefix;
444         unsigned int pref;
445         u32 lifetime;
446         struct rt6_info *rt;
447
448         if (len < sizeof(struct route_info)) {
449                 return -EINVAL;
450         }
451
452         /* Sanity check for prefix_len and length */
453         if (rinfo->length > 3) {
454                 return -EINVAL;
455         } else if (rinfo->prefix_len > 128) {
456                 return -EINVAL;
457         } else if (rinfo->prefix_len > 64) {
458                 if (rinfo->length < 2) {
459                         return -EINVAL;
460                 }
461         } else if (rinfo->prefix_len > 0) {
462                 if (rinfo->length < 1) {
463                         return -EINVAL;
464                 }
465         }
466
467         pref = rinfo->route_pref;
468         if (pref == ICMPV6_ROUTER_PREF_INVALID)
469                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
470
471         lifetime = ntohl(rinfo->lifetime);
472         if (lifetime == 0xffffffff) {
473                 /* infinity */
474         } else if (lifetime > 0x7fffffff/HZ) {
475                 /* Avoid arithmetic overflow */
476                 lifetime = 0x7fffffff/HZ - 1;
477         }
478
479         if (rinfo->length == 3)
480                 prefix = (struct in6_addr *)rinfo->prefix;
481         else {
482                 /* this function is safe */
483                 ipv6_addr_prefix(&prefix_buf,
484                                  (struct in6_addr *)rinfo->prefix,
485                                  rinfo->prefix_len);
486                 prefix = &prefix_buf;
487         }
488
489         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
490
491         if (rt && !lifetime) {
492                 ip6_del_rt(rt);
493                 rt = NULL;
494         }
495
496         if (!rt && lifetime)
497                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
498                                         pref);
499         else if (rt)
500                 rt->rt6i_flags = RTF_ROUTEINFO |
501                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
502
503         if (rt) {
504                 if (lifetime == 0xffffffff) {
505                         rt->rt6i_flags &= ~RTF_EXPIRES;
506                 } else {
507                         rt->rt6i_expires = jiffies + HZ * lifetime;
508                         rt->rt6i_flags |= RTF_EXPIRES;
509                 }
510                 dst_release(&rt->u.dst);
511         }
512         return 0;
513 }
514 #endif
515
516 #define BACKTRACK(saddr) \
517 do { \
518         if (rt == &ip6_null_entry) { \
519                 struct fib6_node *pn; \
520                 while (1) { \
521                         if (fn->fn_flags & RTN_TL_ROOT) \
522                                 goto out; \
523                         pn = fn->parent; \
524                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
525                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
526                         else \
527                                 fn = pn; \
528                         if (fn->fn_flags & RTN_RTINFO) \
529                                 goto restart; \
530                 } \
531         } \
532 } while(0)
533
534 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
535                                              struct flowi *fl, int flags)
536 {
537         struct fib6_node *fn;
538         struct rt6_info *rt;
539
540         read_lock_bh(&table->tb6_lock);
541         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
542 restart:
543         rt = fn->leaf;
544         rt = rt6_device_match(rt, fl->oif, flags);
545         BACKTRACK(&fl->fl6_src);
546 out:
547         dst_use(&rt->u.dst, jiffies);
548         read_unlock_bh(&table->tb6_lock);
549         return rt;
550
551 }
552
553 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
554                             int oif, int strict)
555 {
556         struct flowi fl = {
557                 .oif = oif,
558                 .nl_u = {
559                         .ip6_u = {
560                                 .daddr = *daddr,
561                         },
562                 },
563         };
564         struct dst_entry *dst;
565         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
566
567         if (saddr) {
568                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
569                 flags |= RT6_LOOKUP_F_HAS_SADDR;
570         }
571
572         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
573         if (dst->error == 0)
574                 return (struct rt6_info *) dst;
575
576         dst_release(dst);
577
578         return NULL;
579 }
580
581 EXPORT_SYMBOL(rt6_lookup);
582
583 /* ip6_ins_rt is called with FREE table->tb6_lock.
584    It takes new route entry, the addition fails by any reason the
585    route is freed. In any case, if caller does not hold it, it may
586    be destroyed.
587  */
588
589 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
590 {
591         int err;
592         struct fib6_table *table;
593
594         table = rt->rt6i_table;
595         write_lock_bh(&table->tb6_lock);
596         err = fib6_add(&table->tb6_root, rt, info);
597         write_unlock_bh(&table->tb6_lock);
598
599         return err;
600 }
601
602 int ip6_ins_rt(struct rt6_info *rt)
603 {
604         struct nl_info info = {
605                 .nl_net = &init_net,
606         };
607         return __ip6_ins_rt(rt, &info);
608 }
609
610 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
611                                       struct in6_addr *saddr)
612 {
613         struct rt6_info *rt;
614
615         /*
616          *      Clone the route.
617          */
618
619         rt = ip6_rt_copy(ort);
620
621         if (rt) {
622                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
623                         if (rt->rt6i_dst.plen != 128 &&
624                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
625                                 rt->rt6i_flags |= RTF_ANYCAST;
626                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
627                 }
628
629                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
630                 rt->rt6i_dst.plen = 128;
631                 rt->rt6i_flags |= RTF_CACHE;
632                 rt->u.dst.flags |= DST_HOST;
633
634 #ifdef CONFIG_IPV6_SUBTREES
635                 if (rt->rt6i_src.plen && saddr) {
636                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
637                         rt->rt6i_src.plen = 128;
638                 }
639 #endif
640
641                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
642
643         }
644
645         return rt;
646 }
647
648 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
649 {
650         struct rt6_info *rt = ip6_rt_copy(ort);
651         if (rt) {
652                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
653                 rt->rt6i_dst.plen = 128;
654                 rt->rt6i_flags |= RTF_CACHE;
655                 rt->u.dst.flags |= DST_HOST;
656                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
657         }
658         return rt;
659 }
660
661 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
662                                             struct flowi *fl, int flags)
663 {
664         struct fib6_node *fn;
665         struct rt6_info *rt, *nrt;
666         int strict = 0;
667         int attempts = 3;
668         int err;
669         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
670
671         strict |= flags & RT6_LOOKUP_F_IFACE;
672
673 relookup:
674         read_lock_bh(&table->tb6_lock);
675
676 restart_2:
677         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
678
679 restart:
680         rt = rt6_select(fn, oif, strict | reachable);
681         BACKTRACK(&fl->fl6_src);
682         if (rt == &ip6_null_entry ||
683             rt->rt6i_flags & RTF_CACHE)
684                 goto out;
685
686         dst_hold(&rt->u.dst);
687         read_unlock_bh(&table->tb6_lock);
688
689         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
690                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
691         else {
692 #if CLONE_OFFLINK_ROUTE
693                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
694 #else
695                 goto out2;
696 #endif
697         }
698
699         dst_release(&rt->u.dst);
700         rt = nrt ? : &ip6_null_entry;
701
702         dst_hold(&rt->u.dst);
703         if (nrt) {
704                 err = ip6_ins_rt(nrt);
705                 if (!err)
706                         goto out2;
707         }
708
709         if (--attempts <= 0)
710                 goto out2;
711
712         /*
713          * Race condition! In the gap, when table->tb6_lock was
714          * released someone could insert this route.  Relookup.
715          */
716         dst_release(&rt->u.dst);
717         goto relookup;
718
719 out:
720         if (reachable) {
721                 reachable = 0;
722                 goto restart_2;
723         }
724         dst_hold(&rt->u.dst);
725         read_unlock_bh(&table->tb6_lock);
726 out2:
727         rt->u.dst.lastuse = jiffies;
728         rt->u.dst.__use++;
729
730         return rt;
731 }
732
733 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
734                                             struct flowi *fl, int flags)
735 {
736         return ip6_pol_route(table, fl->iif, fl, flags);
737 }
738
739 void ip6_route_input(struct sk_buff *skb)
740 {
741         struct ipv6hdr *iph = ipv6_hdr(skb);
742         int flags = RT6_LOOKUP_F_HAS_SADDR;
743         struct flowi fl = {
744                 .iif = skb->dev->ifindex,
745                 .nl_u = {
746                         .ip6_u = {
747                                 .daddr = iph->daddr,
748                                 .saddr = iph->saddr,
749                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
750                         },
751                 },
752                 .mark = skb->mark,
753                 .proto = iph->nexthdr,
754         };
755
756         if (rt6_need_strict(&iph->daddr))
757                 flags |= RT6_LOOKUP_F_IFACE;
758
759         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
760 }
761
762 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
763                                              struct flowi *fl, int flags)
764 {
765         return ip6_pol_route(table, fl->oif, fl, flags);
766 }
767
768 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
769 {
770         int flags = 0;
771
772         if (rt6_need_strict(&fl->fl6_dst))
773                 flags |= RT6_LOOKUP_F_IFACE;
774
775         if (!ipv6_addr_any(&fl->fl6_src))
776                 flags |= RT6_LOOKUP_F_HAS_SADDR;
777
778         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
779 }
780
781 EXPORT_SYMBOL(ip6_route_output);
782
783 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
784 {
785         struct rt6_info *ort = (struct rt6_info *) *dstp;
786         struct rt6_info *rt = (struct rt6_info *)
787                 dst_alloc(&ip6_dst_blackhole_ops);
788         struct dst_entry *new = NULL;
789
790         if (rt) {
791                 new = &rt->u.dst;
792
793                 atomic_set(&new->__refcnt, 1);
794                 new->__use = 1;
795                 new->input = dst_discard;
796                 new->output = dst_discard;
797
798                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
799                 new->dev = ort->u.dst.dev;
800                 if (new->dev)
801                         dev_hold(new->dev);
802                 rt->rt6i_idev = ort->rt6i_idev;
803                 if (rt->rt6i_idev)
804                         in6_dev_hold(rt->rt6i_idev);
805                 rt->rt6i_expires = 0;
806
807                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
808                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
809                 rt->rt6i_metric = 0;
810
811                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
812 #ifdef CONFIG_IPV6_SUBTREES
813                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
814 #endif
815
816                 dst_free(new);
817         }
818
819         dst_release(*dstp);
820         *dstp = new;
821         return (new ? 0 : -ENOMEM);
822 }
823 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
824
825 /*
826  *      Destination cache support functions
827  */
828
829 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
830 {
831         struct rt6_info *rt;
832
833         rt = (struct rt6_info *) dst;
834
835         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
836                 return dst;
837
838         return NULL;
839 }
840
841 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
842 {
843         struct rt6_info *rt = (struct rt6_info *) dst;
844
845         if (rt) {
846                 if (rt->rt6i_flags & RTF_CACHE)
847                         ip6_del_rt(rt);
848                 else
849                         dst_release(dst);
850         }
851         return NULL;
852 }
853
854 static void ip6_link_failure(struct sk_buff *skb)
855 {
856         struct rt6_info *rt;
857
858         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
859
860         rt = (struct rt6_info *) skb->dst;
861         if (rt) {
862                 if (rt->rt6i_flags&RTF_CACHE) {
863                         dst_set_expires(&rt->u.dst, 0);
864                         rt->rt6i_flags |= RTF_EXPIRES;
865                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
866                         rt->rt6i_node->fn_sernum = -1;
867         }
868 }
869
870 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
871 {
872         struct rt6_info *rt6 = (struct rt6_info*)dst;
873
874         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
875                 rt6->rt6i_flags |= RTF_MODIFIED;
876                 if (mtu < IPV6_MIN_MTU) {
877                         mtu = IPV6_MIN_MTU;
878                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
879                 }
880                 dst->metrics[RTAX_MTU-1] = mtu;
881                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
882         }
883 }
884
885 static int ipv6_get_mtu(struct net_device *dev);
886
887 static inline unsigned int ipv6_advmss(unsigned int mtu)
888 {
889         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
890
891         if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
892                 mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
893
894         /*
895          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
896          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
897          * IPV6_MAXPLEN is also valid and means: "any MSS,
898          * rely only on pmtu discovery"
899          */
900         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
901                 mtu = IPV6_MAXPLEN;
902         return mtu;
903 }
904
905 static struct dst_entry *ndisc_dst_gc_list;
906 static DEFINE_SPINLOCK(ndisc_lock);
907
908 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
909                                   struct neighbour *neigh,
910                                   struct in6_addr *addr,
911                                   int (*output)(struct sk_buff *))
912 {
913         struct rt6_info *rt;
914         struct inet6_dev *idev = in6_dev_get(dev);
915
916         if (unlikely(idev == NULL))
917                 return NULL;
918
919         rt = ip6_dst_alloc();
920         if (unlikely(rt == NULL)) {
921                 in6_dev_put(idev);
922                 goto out;
923         }
924
925         dev_hold(dev);
926         if (neigh)
927                 neigh_hold(neigh);
928         else
929                 neigh = ndisc_get_neigh(dev, addr);
930
931         rt->rt6i_dev      = dev;
932         rt->rt6i_idev     = idev;
933         rt->rt6i_nexthop  = neigh;
934         atomic_set(&rt->u.dst.__refcnt, 1);
935         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
936         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
937         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
938         rt->u.dst.output  = output;
939
940 #if 0   /* there's no chance to use these for ndisc */
941         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
942                                 ? DST_HOST
943                                 : 0;
944         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
945         rt->rt6i_dst.plen = 128;
946 #endif
947
948         spin_lock_bh(&ndisc_lock);
949         rt->u.dst.next = ndisc_dst_gc_list;
950         ndisc_dst_gc_list = &rt->u.dst;
951         spin_unlock_bh(&ndisc_lock);
952
953         fib6_force_start_gc();
954
955 out:
956         return &rt->u.dst;
957 }
958
959 int ndisc_dst_gc(int *more)
960 {
961         struct dst_entry *dst, *next, **pprev;
962         int freed;
963
964         next = NULL;
965         freed = 0;
966
967         spin_lock_bh(&ndisc_lock);
968         pprev = &ndisc_dst_gc_list;
969
970         while ((dst = *pprev) != NULL) {
971                 if (!atomic_read(&dst->__refcnt)) {
972                         *pprev = dst->next;
973                         dst_free(dst);
974                         freed++;
975                 } else {
976                         pprev = &dst->next;
977                         (*more)++;
978                 }
979         }
980
981         spin_unlock_bh(&ndisc_lock);
982
983         return freed;
984 }
985
986 static int ip6_dst_gc(struct dst_ops *ops)
987 {
988         static unsigned expire = 30*HZ;
989         static unsigned long last_gc;
990         unsigned long now = jiffies;
991
992         if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
993             atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
994                 goto out;
995
996         expire++;
997         fib6_run_gc(expire);
998         last_gc = now;
999         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1000                 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1001
1002 out:
1003         expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1004         return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1005 }
1006
1007 /* Clean host part of a prefix. Not necessary in radix tree,
1008    but results in cleaner routing tables.
1009
1010    Remove it only when all the things will work!
1011  */
1012
1013 static int ipv6_get_mtu(struct net_device *dev)
1014 {
1015         int mtu = IPV6_MIN_MTU;
1016         struct inet6_dev *idev;
1017
1018         idev = in6_dev_get(dev);
1019         if (idev) {
1020                 mtu = idev->cnf.mtu6;
1021                 in6_dev_put(idev);
1022         }
1023         return mtu;
1024 }
1025
1026 int ipv6_get_hoplimit(struct net_device *dev)
1027 {
1028         int hoplimit = ipv6_devconf.hop_limit;
1029         struct inet6_dev *idev;
1030
1031         idev = in6_dev_get(dev);
1032         if (idev) {
1033                 hoplimit = idev->cnf.hop_limit;
1034                 in6_dev_put(idev);
1035         }
1036         return hoplimit;
1037 }
1038
1039 /*
1040  *
1041  */
1042
1043 int ip6_route_add(struct fib6_config *cfg)
1044 {
1045         int err;
1046         struct rt6_info *rt = NULL;
1047         struct net_device *dev = NULL;
1048         struct inet6_dev *idev = NULL;
1049         struct fib6_table *table;
1050         int addr_type;
1051
1052         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1053                 return -EINVAL;
1054 #ifndef CONFIG_IPV6_SUBTREES
1055         if (cfg->fc_src_len)
1056                 return -EINVAL;
1057 #endif
1058         if (cfg->fc_ifindex) {
1059                 err = -ENODEV;
1060                 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1061                 if (!dev)
1062                         goto out;
1063                 idev = in6_dev_get(dev);
1064                 if (!idev)
1065                         goto out;
1066         }
1067
1068         if (cfg->fc_metric == 0)
1069                 cfg->fc_metric = IP6_RT_PRIO_USER;
1070
1071         table = fib6_new_table(cfg->fc_table);
1072         if (table == NULL) {
1073                 err = -ENOBUFS;
1074                 goto out;
1075         }
1076
1077         rt = ip6_dst_alloc();
1078
1079         if (rt == NULL) {
1080                 err = -ENOMEM;
1081                 goto out;
1082         }
1083
1084         rt->u.dst.obsolete = -1;
1085         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1086
1087         if (cfg->fc_protocol == RTPROT_UNSPEC)
1088                 cfg->fc_protocol = RTPROT_BOOT;
1089         rt->rt6i_protocol = cfg->fc_protocol;
1090
1091         addr_type = ipv6_addr_type(&cfg->fc_dst);
1092
1093         if (addr_type & IPV6_ADDR_MULTICAST)
1094                 rt->u.dst.input = ip6_mc_input;
1095         else
1096                 rt->u.dst.input = ip6_forward;
1097
1098         rt->u.dst.output = ip6_output;
1099
1100         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1101         rt->rt6i_dst.plen = cfg->fc_dst_len;
1102         if (rt->rt6i_dst.plen == 128)
1103                rt->u.dst.flags = DST_HOST;
1104
1105 #ifdef CONFIG_IPV6_SUBTREES
1106         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1107         rt->rt6i_src.plen = cfg->fc_src_len;
1108 #endif
1109
1110         rt->rt6i_metric = cfg->fc_metric;
1111
1112         /* We cannot add true routes via loopback here,
1113            they would result in kernel looping; promote them to reject routes
1114          */
1115         if ((cfg->fc_flags & RTF_REJECT) ||
1116             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1117                 /* hold loopback dev/idev if we haven't done so. */
1118                 if (dev != init_net.loopback_dev) {
1119                         if (dev) {
1120                                 dev_put(dev);
1121                                 in6_dev_put(idev);
1122                         }
1123                         dev = init_net.loopback_dev;
1124                         dev_hold(dev);
1125                         idev = in6_dev_get(dev);
1126                         if (!idev) {
1127                                 err = -ENODEV;
1128                                 goto out;
1129                         }
1130                 }
1131                 rt->u.dst.output = ip6_pkt_discard_out;
1132                 rt->u.dst.input = ip6_pkt_discard;
1133                 rt->u.dst.error = -ENETUNREACH;
1134                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1135                 goto install_route;
1136         }
1137
1138         if (cfg->fc_flags & RTF_GATEWAY) {
1139                 struct in6_addr *gw_addr;
1140                 int gwa_type;
1141
1142                 gw_addr = &cfg->fc_gateway;
1143                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1144                 gwa_type = ipv6_addr_type(gw_addr);
1145
1146                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1147                         struct rt6_info *grt;
1148
1149                         /* IPv6 strictly inhibits using not link-local
1150                            addresses as nexthop address.
1151                            Otherwise, router will not able to send redirects.
1152                            It is very good, but in some (rare!) circumstances
1153                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1154                            some exceptions. --ANK
1155                          */
1156                         err = -EINVAL;
1157                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1158                                 goto out;
1159
1160                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1161
1162                         err = -EHOSTUNREACH;
1163                         if (grt == NULL)
1164                                 goto out;
1165                         if (dev) {
1166                                 if (dev != grt->rt6i_dev) {
1167                                         dst_release(&grt->u.dst);
1168                                         goto out;
1169                                 }
1170                         } else {
1171                                 dev = grt->rt6i_dev;
1172                                 idev = grt->rt6i_idev;
1173                                 dev_hold(dev);
1174                                 in6_dev_hold(grt->rt6i_idev);
1175                         }
1176                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1177                                 err = 0;
1178                         dst_release(&grt->u.dst);
1179
1180                         if (err)
1181                                 goto out;
1182                 }
1183                 err = -EINVAL;
1184                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1185                         goto out;
1186         }
1187
1188         err = -ENODEV;
1189         if (dev == NULL)
1190                 goto out;
1191
1192         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1193                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1194                 if (IS_ERR(rt->rt6i_nexthop)) {
1195                         err = PTR_ERR(rt->rt6i_nexthop);
1196                         rt->rt6i_nexthop = NULL;
1197                         goto out;
1198                 }
1199         }
1200
1201         rt->rt6i_flags = cfg->fc_flags;
1202
1203 install_route:
1204         if (cfg->fc_mx) {
1205                 struct nlattr *nla;
1206                 int remaining;
1207
1208                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1209                         int type = nla_type(nla);
1210
1211                         if (type) {
1212                                 if (type > RTAX_MAX) {
1213                                         err = -EINVAL;
1214                                         goto out;
1215                                 }
1216
1217                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1218                         }
1219                 }
1220         }
1221
1222         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1223                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1224         if (!rt->u.dst.metrics[RTAX_MTU-1])
1225                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1226         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1227                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1228         rt->u.dst.dev = dev;
1229         rt->rt6i_idev = idev;
1230         rt->rt6i_table = table;
1231         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1232
1233 out:
1234         if (dev)
1235                 dev_put(dev);
1236         if (idev)
1237                 in6_dev_put(idev);
1238         if (rt)
1239                 dst_free(&rt->u.dst);
1240         return err;
1241 }
1242
1243 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1244 {
1245         int err;
1246         struct fib6_table *table;
1247
1248         if (rt == &ip6_null_entry)
1249                 return -ENOENT;
1250
1251         table = rt->rt6i_table;
1252         write_lock_bh(&table->tb6_lock);
1253
1254         err = fib6_del(rt, info);
1255         dst_release(&rt->u.dst);
1256
1257         write_unlock_bh(&table->tb6_lock);
1258
1259         return err;
1260 }
1261
1262 int ip6_del_rt(struct rt6_info *rt)
1263 {
1264         struct nl_info info = {
1265                 .nl_net = &init_net,
1266         };
1267         return __ip6_del_rt(rt, &info);
1268 }
1269
1270 static int ip6_route_del(struct fib6_config *cfg)
1271 {
1272         struct fib6_table *table;
1273         struct fib6_node *fn;
1274         struct rt6_info *rt;
1275         int err = -ESRCH;
1276
1277         table = fib6_get_table(cfg->fc_table);
1278         if (table == NULL)
1279                 return err;
1280
1281         read_lock_bh(&table->tb6_lock);
1282
1283         fn = fib6_locate(&table->tb6_root,
1284                          &cfg->fc_dst, cfg->fc_dst_len,
1285                          &cfg->fc_src, cfg->fc_src_len);
1286
1287         if (fn) {
1288                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1289                         if (cfg->fc_ifindex &&
1290                             (rt->rt6i_dev == NULL ||
1291                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1292                                 continue;
1293                         if (cfg->fc_flags & RTF_GATEWAY &&
1294                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1295                                 continue;
1296                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1297                                 continue;
1298                         dst_hold(&rt->u.dst);
1299                         read_unlock_bh(&table->tb6_lock);
1300
1301                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1302                 }
1303         }
1304         read_unlock_bh(&table->tb6_lock);
1305
1306         return err;
1307 }
1308
1309 /*
1310  *      Handle redirects
1311  */
1312 struct ip6rd_flowi {
1313         struct flowi fl;
1314         struct in6_addr gateway;
1315 };
1316
1317 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1318                                              struct flowi *fl,
1319                                              int flags)
1320 {
1321         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1322         struct rt6_info *rt;
1323         struct fib6_node *fn;
1324
1325         /*
1326          * Get the "current" route for this destination and
1327          * check if the redirect has come from approriate router.
1328          *
1329          * RFC 2461 specifies that redirects should only be
1330          * accepted if they come from the nexthop to the target.
1331          * Due to the way the routes are chosen, this notion
1332          * is a bit fuzzy and one might need to check all possible
1333          * routes.
1334          */
1335
1336         read_lock_bh(&table->tb6_lock);
1337         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1338 restart:
1339         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1340                 /*
1341                  * Current route is on-link; redirect is always invalid.
1342                  *
1343                  * Seems, previous statement is not true. It could
1344                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1345                  * But then router serving it might decide, that we should
1346                  * know truth 8)8) --ANK (980726).
1347                  */
1348                 if (rt6_check_expired(rt))
1349                         continue;
1350                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1351                         continue;
1352                 if (fl->oif != rt->rt6i_dev->ifindex)
1353                         continue;
1354                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1355                         continue;
1356                 break;
1357         }
1358
1359         if (!rt)
1360                 rt = &ip6_null_entry;
1361         BACKTRACK(&fl->fl6_src);
1362 out:
1363         dst_hold(&rt->u.dst);
1364
1365         read_unlock_bh(&table->tb6_lock);
1366
1367         return rt;
1368 };
1369
1370 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1371                                            struct in6_addr *src,
1372                                            struct in6_addr *gateway,
1373                                            struct net_device *dev)
1374 {
1375         int flags = RT6_LOOKUP_F_HAS_SADDR;
1376         struct ip6rd_flowi rdfl = {
1377                 .fl = {
1378                         .oif = dev->ifindex,
1379                         .nl_u = {
1380                                 .ip6_u = {
1381                                         .daddr = *dest,
1382                                         .saddr = *src,
1383                                 },
1384                         },
1385                 },
1386                 .gateway = *gateway,
1387         };
1388
1389         if (rt6_need_strict(dest))
1390                 flags |= RT6_LOOKUP_F_IFACE;
1391
1392         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1393 }
1394
1395 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1396                   struct in6_addr *saddr,
1397                   struct neighbour *neigh, u8 *lladdr, int on_link)
1398 {
1399         struct rt6_info *rt, *nrt = NULL;
1400         struct netevent_redirect netevent;
1401
1402         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1403
1404         if (rt == &ip6_null_entry) {
1405                 if (net_ratelimit())
1406                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1407                                "for redirect target\n");
1408                 goto out;
1409         }
1410
1411         /*
1412          *      We have finally decided to accept it.
1413          */
1414
1415         neigh_update(neigh, lladdr, NUD_STALE,
1416                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1417                      NEIGH_UPDATE_F_OVERRIDE|
1418                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1419                                      NEIGH_UPDATE_F_ISROUTER))
1420                      );
1421
1422         /*
1423          * Redirect received -> path was valid.
1424          * Look, redirects are sent only in response to data packets,
1425          * so that this nexthop apparently is reachable. --ANK
1426          */
1427         dst_confirm(&rt->u.dst);
1428
1429         /* Duplicate redirect: silently ignore. */
1430         if (neigh == rt->u.dst.neighbour)
1431                 goto out;
1432
1433         nrt = ip6_rt_copy(rt);
1434         if (nrt == NULL)
1435                 goto out;
1436
1437         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1438         if (on_link)
1439                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1440
1441         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1442         nrt->rt6i_dst.plen = 128;
1443         nrt->u.dst.flags |= DST_HOST;
1444
1445         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1446         nrt->rt6i_nexthop = neigh_clone(neigh);
1447         /* Reset pmtu, it may be better */
1448         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1449         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1450
1451         if (ip6_ins_rt(nrt))
1452                 goto out;
1453
1454         netevent.old = &rt->u.dst;
1455         netevent.new = &nrt->u.dst;
1456         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1457
1458         if (rt->rt6i_flags&RTF_CACHE) {
1459                 ip6_del_rt(rt);
1460                 return;
1461         }
1462
1463 out:
1464         dst_release(&rt->u.dst);
1465         return;
1466 }
1467
1468 /*
1469  *      Handle ICMP "packet too big" messages
1470  *      i.e. Path MTU discovery
1471  */
1472
1473 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1474                         struct net_device *dev, u32 pmtu)
1475 {
1476         struct rt6_info *rt, *nrt;
1477         int allfrag = 0;
1478
1479         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1480         if (rt == NULL)
1481                 return;
1482
1483         if (pmtu >= dst_mtu(&rt->u.dst))
1484                 goto out;
1485
1486         if (pmtu < IPV6_MIN_MTU) {
1487                 /*
1488                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1489                  * MTU (1280) and a fragment header should always be included
1490                  * after a node receiving Too Big message reporting PMTU is
1491                  * less than the IPv6 Minimum Link MTU.
1492                  */
1493                 pmtu = IPV6_MIN_MTU;
1494                 allfrag = 1;
1495         }
1496
1497         /* New mtu received -> path was valid.
1498            They are sent only in response to data packets,
1499            so that this nexthop apparently is reachable. --ANK
1500          */
1501         dst_confirm(&rt->u.dst);
1502
1503         /* Host route. If it is static, it would be better
1504            not to override it, but add new one, so that
1505            when cache entry will expire old pmtu
1506            would return automatically.
1507          */
1508         if (rt->rt6i_flags & RTF_CACHE) {
1509                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1510                 if (allfrag)
1511                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1512                 dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1513                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1514                 goto out;
1515         }
1516
1517         /* Network route.
1518            Two cases are possible:
1519            1. It is connected route. Action: COW
1520            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1521          */
1522         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1523                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1524         else
1525                 nrt = rt6_alloc_clone(rt, daddr);
1526
1527         if (nrt) {
1528                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1529                 if (allfrag)
1530                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1531
1532                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1533                  * happened within 5 mins, the recommended timer is 10 mins.
1534                  * Here this route expiration time is set to ip6_rt_mtu_expires
1535                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1536                  * and detecting PMTU increase will be automatically happened.
1537                  */
1538                 dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1539                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1540
1541                 ip6_ins_rt(nrt);
1542         }
1543 out:
1544         dst_release(&rt->u.dst);
1545 }
1546
1547 /*
1548  *      Misc support functions
1549  */
1550
1551 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1552 {
1553         struct rt6_info *rt = ip6_dst_alloc();
1554
1555         if (rt) {
1556                 rt->u.dst.input = ort->u.dst.input;
1557                 rt->u.dst.output = ort->u.dst.output;
1558
1559                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1560                 rt->u.dst.error = ort->u.dst.error;
1561                 rt->u.dst.dev = ort->u.dst.dev;
1562                 if (rt->u.dst.dev)
1563                         dev_hold(rt->u.dst.dev);
1564                 rt->rt6i_idev = ort->rt6i_idev;
1565                 if (rt->rt6i_idev)
1566                         in6_dev_hold(rt->rt6i_idev);
1567                 rt->u.dst.lastuse = jiffies;
1568                 rt->rt6i_expires = 0;
1569
1570                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1571                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1572                 rt->rt6i_metric = 0;
1573
1574                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1575 #ifdef CONFIG_IPV6_SUBTREES
1576                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1577 #endif
1578                 rt->rt6i_table = ort->rt6i_table;
1579         }
1580         return rt;
1581 }
1582
1583 #ifdef CONFIG_IPV6_ROUTE_INFO
1584 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1585                                            struct in6_addr *gwaddr, int ifindex)
1586 {
1587         struct fib6_node *fn;
1588         struct rt6_info *rt = NULL;
1589         struct fib6_table *table;
1590
1591         table = fib6_get_table(RT6_TABLE_INFO);
1592         if (table == NULL)
1593                 return NULL;
1594
1595         write_lock_bh(&table->tb6_lock);
1596         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1597         if (!fn)
1598                 goto out;
1599
1600         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1601                 if (rt->rt6i_dev->ifindex != ifindex)
1602                         continue;
1603                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1604                         continue;
1605                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1606                         continue;
1607                 dst_hold(&rt->u.dst);
1608                 break;
1609         }
1610 out:
1611         write_unlock_bh(&table->tb6_lock);
1612         return rt;
1613 }
1614
1615 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1616                                            struct in6_addr *gwaddr, int ifindex,
1617                                            unsigned pref)
1618 {
1619         struct fib6_config cfg = {
1620                 .fc_table       = RT6_TABLE_INFO,
1621                 .fc_metric      = 1024,
1622                 .fc_ifindex     = ifindex,
1623                 .fc_dst_len     = prefixlen,
1624                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1625                                   RTF_UP | RTF_PREF(pref),
1626         };
1627
1628         ipv6_addr_copy(&cfg.fc_dst, prefix);
1629         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1630
1631         /* We should treat it as a default route if prefix length is 0. */
1632         if (!prefixlen)
1633                 cfg.fc_flags |= RTF_DEFAULT;
1634
1635         ip6_route_add(&cfg);
1636
1637         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1638 }
1639 #endif
1640
1641 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1642 {
1643         struct rt6_info *rt;
1644         struct fib6_table *table;
1645
1646         table = fib6_get_table(RT6_TABLE_DFLT);
1647         if (table == NULL)
1648                 return NULL;
1649
1650         write_lock_bh(&table->tb6_lock);
1651         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1652                 if (dev == rt->rt6i_dev &&
1653                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1654                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1655                         break;
1656         }
1657         if (rt)
1658                 dst_hold(&rt->u.dst);
1659         write_unlock_bh(&table->tb6_lock);
1660         return rt;
1661 }
1662
1663 EXPORT_SYMBOL(rt6_get_dflt_router);
1664
1665 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1666                                      struct net_device *dev,
1667                                      unsigned int pref)
1668 {
1669         struct fib6_config cfg = {
1670                 .fc_table       = RT6_TABLE_DFLT,
1671                 .fc_metric      = 1024,
1672                 .fc_ifindex     = dev->ifindex,
1673                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1674                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1675         };
1676
1677         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1678
1679         ip6_route_add(&cfg);
1680
1681         return rt6_get_dflt_router(gwaddr, dev);
1682 }
1683
1684 void rt6_purge_dflt_routers(void)
1685 {
1686         struct rt6_info *rt;
1687         struct fib6_table *table;
1688
1689         /* NOTE: Keep consistent with rt6_get_dflt_router */
1690         table = fib6_get_table(RT6_TABLE_DFLT);
1691         if (table == NULL)
1692                 return;
1693
1694 restart:
1695         read_lock_bh(&table->tb6_lock);
1696         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1697                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1698                         dst_hold(&rt->u.dst);
1699                         read_unlock_bh(&table->tb6_lock);
1700                         ip6_del_rt(rt);
1701                         goto restart;
1702                 }
1703         }
1704         read_unlock_bh(&table->tb6_lock);
1705 }
1706
1707 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1708                                  struct fib6_config *cfg)
1709 {
1710         memset(cfg, 0, sizeof(*cfg));
1711
1712         cfg->fc_table = RT6_TABLE_MAIN;
1713         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1714         cfg->fc_metric = rtmsg->rtmsg_metric;
1715         cfg->fc_expires = rtmsg->rtmsg_info;
1716         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1717         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1718         cfg->fc_flags = rtmsg->rtmsg_flags;
1719
1720         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1721         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1722         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1723 }
1724
1725 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1726 {
1727         struct fib6_config cfg;
1728         struct in6_rtmsg rtmsg;
1729         int err;
1730
1731         switch(cmd) {
1732         case SIOCADDRT:         /* Add a route */
1733         case SIOCDELRT:         /* Delete a route */
1734                 if (!capable(CAP_NET_ADMIN))
1735                         return -EPERM;
1736                 err = copy_from_user(&rtmsg, arg,
1737                                      sizeof(struct in6_rtmsg));
1738                 if (err)
1739                         return -EFAULT;
1740
1741                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1742
1743                 rtnl_lock();
1744                 switch (cmd) {
1745                 case SIOCADDRT:
1746                         err = ip6_route_add(&cfg);
1747                         break;
1748                 case SIOCDELRT:
1749                         err = ip6_route_del(&cfg);
1750                         break;
1751                 default:
1752                         err = -EINVAL;
1753                 }
1754                 rtnl_unlock();
1755
1756                 return err;
1757         }
1758
1759         return -EINVAL;
1760 }
1761
1762 /*
1763  *      Drop the packet on the floor
1764  */
1765
1766 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1767 {
1768         int type;
1769         switch (ipstats_mib_noroutes) {
1770         case IPSTATS_MIB_INNOROUTES:
1771                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1772                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1773                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1774                         break;
1775                 }
1776                 /* FALLTHROUGH */
1777         case IPSTATS_MIB_OUTNOROUTES:
1778                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1779                 break;
1780         }
1781         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1782         kfree_skb(skb);
1783         return 0;
1784 }
1785
1786 static int ip6_pkt_discard(struct sk_buff *skb)
1787 {
1788         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1789 }
1790
1791 static int ip6_pkt_discard_out(struct sk_buff *skb)
1792 {
1793         skb->dev = skb->dst->dev;
1794         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1795 }
1796
1797 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1798
1799 static int ip6_pkt_prohibit(struct sk_buff *skb)
1800 {
1801         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1802 }
1803
1804 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1805 {
1806         skb->dev = skb->dst->dev;
1807         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1808 }
1809
1810 #endif
1811
1812 /*
1813  *      Allocate a dst for local (unicast / anycast) address.
1814  */
1815
1816 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1817                                     const struct in6_addr *addr,
1818                                     int anycast)
1819 {
1820         struct rt6_info *rt = ip6_dst_alloc();
1821
1822         if (rt == NULL)
1823                 return ERR_PTR(-ENOMEM);
1824
1825         dev_hold(init_net.loopback_dev);
1826         in6_dev_hold(idev);
1827
1828         rt->u.dst.flags = DST_HOST;
1829         rt->u.dst.input = ip6_input;
1830         rt->u.dst.output = ip6_output;
1831         rt->rt6i_dev = init_net.loopback_dev;
1832         rt->rt6i_idev = idev;
1833         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1834         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1835         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1836         rt->u.dst.obsolete = -1;
1837
1838         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1839         if (anycast)
1840                 rt->rt6i_flags |= RTF_ANYCAST;
1841         else
1842                 rt->rt6i_flags |= RTF_LOCAL;
1843         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1844         if (rt->rt6i_nexthop == NULL) {
1845                 dst_free(&rt->u.dst);
1846                 return ERR_PTR(-ENOMEM);
1847         }
1848
1849         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1850         rt->rt6i_dst.plen = 128;
1851         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1852
1853         atomic_set(&rt->u.dst.__refcnt, 1);
1854
1855         return rt;
1856 }
1857
1858 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1859 {
1860         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1861             rt != &ip6_null_entry) {
1862                 RT6_TRACE("deleted by ifdown %p\n", rt);
1863                 return -1;
1864         }
1865         return 0;
1866 }
1867
1868 void rt6_ifdown(struct net_device *dev)
1869 {
1870         fib6_clean_all(fib6_ifdown, 0, dev);
1871 }
1872
1873 struct rt6_mtu_change_arg
1874 {
1875         struct net_device *dev;
1876         unsigned mtu;
1877 };
1878
1879 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1880 {
1881         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1882         struct inet6_dev *idev;
1883
1884         /* In IPv6 pmtu discovery is not optional,
1885            so that RTAX_MTU lock cannot disable it.
1886            We still use this lock to block changes
1887            caused by addrconf/ndisc.
1888         */
1889
1890         idev = __in6_dev_get(arg->dev);
1891         if (idev == NULL)
1892                 return 0;
1893
1894         /* For administrative MTU increase, there is no way to discover
1895            IPv6 PMTU increase, so PMTU increase should be updated here.
1896            Since RFC 1981 doesn't include administrative MTU increase
1897            update PMTU increase is a MUST. (i.e. jumbo frame)
1898          */
1899         /*
1900            If new MTU is less than route PMTU, this new MTU will be the
1901            lowest MTU in the path, update the route PMTU to reflect PMTU
1902            decreases; if new MTU is greater than route PMTU, and the
1903            old MTU is the lowest MTU in the path, update the route PMTU
1904            to reflect the increase. In this case if the other nodes' MTU
1905            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1906            PMTU discouvery.
1907          */
1908         if (rt->rt6i_dev == arg->dev &&
1909             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1910             (dst_mtu(&rt->u.dst) > arg->mtu ||
1911              (dst_mtu(&rt->u.dst) < arg->mtu &&
1912               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1913                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1914                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1915         }
1916         return 0;
1917 }
1918
1919 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1920 {
1921         struct rt6_mtu_change_arg arg = {
1922                 .dev = dev,
1923                 .mtu = mtu,
1924         };
1925
1926         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1927 }
1928
1929 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1930         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1931         [RTA_OIF]               = { .type = NLA_U32 },
1932         [RTA_IIF]               = { .type = NLA_U32 },
1933         [RTA_PRIORITY]          = { .type = NLA_U32 },
1934         [RTA_METRICS]           = { .type = NLA_NESTED },
1935 };
1936
1937 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1938                               struct fib6_config *cfg)
1939 {
1940         struct rtmsg *rtm;
1941         struct nlattr *tb[RTA_MAX+1];
1942         int err;
1943
1944         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1945         if (err < 0)
1946                 goto errout;
1947
1948         err = -EINVAL;
1949         rtm = nlmsg_data(nlh);
1950         memset(cfg, 0, sizeof(*cfg));
1951
1952         cfg->fc_table = rtm->rtm_table;
1953         cfg->fc_dst_len = rtm->rtm_dst_len;
1954         cfg->fc_src_len = rtm->rtm_src_len;
1955         cfg->fc_flags = RTF_UP;
1956         cfg->fc_protocol = rtm->rtm_protocol;
1957
1958         if (rtm->rtm_type == RTN_UNREACHABLE)
1959                 cfg->fc_flags |= RTF_REJECT;
1960
1961         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1962         cfg->fc_nlinfo.nlh = nlh;
1963
1964         if (tb[RTA_GATEWAY]) {
1965                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1966                 cfg->fc_flags |= RTF_GATEWAY;
1967         }
1968
1969         if (tb[RTA_DST]) {
1970                 int plen = (rtm->rtm_dst_len + 7) >> 3;
1971
1972                 if (nla_len(tb[RTA_DST]) < plen)
1973                         goto errout;
1974
1975                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1976         }
1977
1978         if (tb[RTA_SRC]) {
1979                 int plen = (rtm->rtm_src_len + 7) >> 3;
1980
1981                 if (nla_len(tb[RTA_SRC]) < plen)
1982                         goto errout;
1983
1984                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1985         }
1986
1987         if (tb[RTA_OIF])
1988                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1989
1990         if (tb[RTA_PRIORITY])
1991                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1992
1993         if (tb[RTA_METRICS]) {
1994                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1995                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1996         }
1997
1998         if (tb[RTA_TABLE])
1999                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2000
2001         err = 0;
2002 errout:
2003         return err;
2004 }
2005
2006 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2007 {
2008         struct net *net = skb->sk->sk_net;
2009         struct fib6_config cfg;
2010         int err;
2011
2012         if (net != &init_net)
2013                 return -EINVAL;
2014
2015         err = rtm_to_fib6_config(skb, nlh, &cfg);
2016         if (err < 0)
2017                 return err;
2018
2019         return ip6_route_del(&cfg);
2020 }
2021
2022 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2023 {
2024         struct net *net = skb->sk->sk_net;
2025         struct fib6_config cfg;
2026         int err;
2027
2028         if (net != &init_net)
2029                 return -EINVAL;
2030
2031         err = rtm_to_fib6_config(skb, nlh, &cfg);
2032         if (err < 0)
2033                 return err;
2034
2035         return ip6_route_add(&cfg);
2036 }
2037
2038 static inline size_t rt6_nlmsg_size(void)
2039 {
2040         return NLMSG_ALIGN(sizeof(struct rtmsg))
2041                + nla_total_size(16) /* RTA_SRC */
2042                + nla_total_size(16) /* RTA_DST */
2043                + nla_total_size(16) /* RTA_GATEWAY */
2044                + nla_total_size(16) /* RTA_PREFSRC */
2045                + nla_total_size(4) /* RTA_TABLE */
2046                + nla_total_size(4) /* RTA_IIF */
2047                + nla_total_size(4) /* RTA_OIF */
2048                + nla_total_size(4) /* RTA_PRIORITY */
2049                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2050                + nla_total_size(sizeof(struct rta_cacheinfo));
2051 }
2052
2053 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2054                          struct in6_addr *dst, struct in6_addr *src,
2055                          int iif, int type, u32 pid, u32 seq,
2056                          int prefix, unsigned int flags)
2057 {
2058         struct rtmsg *rtm;
2059         struct nlmsghdr *nlh;
2060         long expires;
2061         u32 table;
2062
2063         if (prefix) {   /* user wants prefix routes only */
2064                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2065                         /* success since this is not a prefix route */
2066                         return 1;
2067                 }
2068         }
2069
2070         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2071         if (nlh == NULL)
2072                 return -EMSGSIZE;
2073
2074         rtm = nlmsg_data(nlh);
2075         rtm->rtm_family = AF_INET6;
2076         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2077         rtm->rtm_src_len = rt->rt6i_src.plen;
2078         rtm->rtm_tos = 0;
2079         if (rt->rt6i_table)
2080                 table = rt->rt6i_table->tb6_id;
2081         else
2082                 table = RT6_TABLE_UNSPEC;
2083         rtm->rtm_table = table;
2084         NLA_PUT_U32(skb, RTA_TABLE, table);
2085         if (rt->rt6i_flags&RTF_REJECT)
2086                 rtm->rtm_type = RTN_UNREACHABLE;
2087         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2088                 rtm->rtm_type = RTN_LOCAL;
2089         else
2090                 rtm->rtm_type = RTN_UNICAST;
2091         rtm->rtm_flags = 0;
2092         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2093         rtm->rtm_protocol = rt->rt6i_protocol;
2094         if (rt->rt6i_flags&RTF_DYNAMIC)
2095                 rtm->rtm_protocol = RTPROT_REDIRECT;
2096         else if (rt->rt6i_flags & RTF_ADDRCONF)
2097                 rtm->rtm_protocol = RTPROT_KERNEL;
2098         else if (rt->rt6i_flags&RTF_DEFAULT)
2099                 rtm->rtm_protocol = RTPROT_RA;
2100
2101         if (rt->rt6i_flags&RTF_CACHE)
2102                 rtm->rtm_flags |= RTM_F_CLONED;
2103
2104         if (dst) {
2105                 NLA_PUT(skb, RTA_DST, 16, dst);
2106                 rtm->rtm_dst_len = 128;
2107         } else if (rtm->rtm_dst_len)
2108                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2109 #ifdef CONFIG_IPV6_SUBTREES
2110         if (src) {
2111                 NLA_PUT(skb, RTA_SRC, 16, src);
2112                 rtm->rtm_src_len = 128;
2113         } else if (rtm->rtm_src_len)
2114                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2115 #endif
2116         if (iif)
2117                 NLA_PUT_U32(skb, RTA_IIF, iif);
2118         else if (dst) {
2119                 struct in6_addr saddr_buf;
2120                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2121                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2122         }
2123
2124         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2125                 goto nla_put_failure;
2126
2127         if (rt->u.dst.neighbour)
2128                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2129
2130         if (rt->u.dst.dev)
2131                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2132
2133         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2134
2135         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2136         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2137                                expires, rt->u.dst.error) < 0)
2138                 goto nla_put_failure;
2139
2140         return nlmsg_end(skb, nlh);
2141
2142 nla_put_failure:
2143         nlmsg_cancel(skb, nlh);
2144         return -EMSGSIZE;
2145 }
2146
2147 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2148 {
2149         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2150         int prefix;
2151
2152         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2153                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2154                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2155         } else
2156                 prefix = 0;
2157
2158         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2159                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2160                      prefix, NLM_F_MULTI);
2161 }
2162
2163 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2164 {
2165         struct net *net = in_skb->sk->sk_net;
2166         struct nlattr *tb[RTA_MAX+1];
2167         struct rt6_info *rt;
2168         struct sk_buff *skb;
2169         struct rtmsg *rtm;
2170         struct flowi fl;
2171         int err, iif = 0;
2172
2173         if (net != &init_net)
2174                 return -EINVAL;
2175
2176         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2177         if (err < 0)
2178                 goto errout;
2179
2180         err = -EINVAL;
2181         memset(&fl, 0, sizeof(fl));
2182
2183         if (tb[RTA_SRC]) {
2184                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2185                         goto errout;
2186
2187                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2188         }
2189
2190         if (tb[RTA_DST]) {
2191                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2192                         goto errout;
2193
2194                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2195         }
2196
2197         if (tb[RTA_IIF])
2198                 iif = nla_get_u32(tb[RTA_IIF]);
2199
2200         if (tb[RTA_OIF])
2201                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2202
2203         if (iif) {
2204                 struct net_device *dev;
2205                 dev = __dev_get_by_index(&init_net, iif);
2206                 if (!dev) {
2207                         err = -ENODEV;
2208                         goto errout;
2209                 }
2210         }
2211
2212         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2213         if (skb == NULL) {
2214                 err = -ENOBUFS;
2215                 goto errout;
2216         }
2217
2218         /* Reserve room for dummy headers, this skb can pass
2219            through good chunk of routing engine.
2220          */
2221         skb_reset_mac_header(skb);
2222         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2223
2224         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2225         skb->dst = &rt->u.dst;
2226
2227         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2228                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2229                             nlh->nlmsg_seq, 0, 0);
2230         if (err < 0) {
2231                 kfree_skb(skb);
2232                 goto errout;
2233         }
2234
2235         err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
2236 errout:
2237         return err;
2238 }
2239
2240 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2241 {
2242         struct sk_buff *skb;
2243         u32 seq;
2244         int err;
2245
2246         err = -ENOBUFS;
2247         seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2248
2249         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2250         if (skb == NULL)
2251                 goto errout;
2252
2253         err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2254                                 event, info->pid, seq, 0, 0);
2255         if (err < 0) {
2256                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2257                 WARN_ON(err == -EMSGSIZE);
2258                 kfree_skb(skb);
2259                 goto errout;
2260         }
2261         err = rtnl_notify(skb, &init_net, info->pid,
2262                                 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
2263 errout:
2264         if (err < 0)
2265                 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
2266 }
2267
2268 /*
2269  *      /proc
2270  */
2271
2272 #ifdef CONFIG_PROC_FS
2273
2274 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2275
2276 struct rt6_proc_arg
2277 {
2278         char *buffer;
2279         int offset;
2280         int length;
2281         int skip;
2282         int len;
2283 };
2284
2285 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2286 {
2287         struct seq_file *m = p_arg;
2288
2289         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2290                    rt->rt6i_dst.plen);
2291
2292 #ifdef CONFIG_IPV6_SUBTREES
2293         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2294                    rt->rt6i_src.plen);
2295 #else
2296         seq_puts(m, "00000000000000000000000000000000 00 ");
2297 #endif
2298
2299         if (rt->rt6i_nexthop) {
2300                 seq_printf(m, NIP6_SEQFMT,
2301                            NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2302         } else {
2303                 seq_puts(m, "00000000000000000000000000000000");
2304         }
2305         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2306                    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2307                    rt->u.dst.__use, rt->rt6i_flags,
2308                    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2309         return 0;
2310 }
2311
2312 static int ipv6_route_show(struct seq_file *m, void *v)
2313 {
2314         fib6_clean_all(rt6_info_route, 0, m);
2315         return 0;
2316 }
2317
2318 static int ipv6_route_open(struct inode *inode, struct file *file)
2319 {
2320         return single_open(file, ipv6_route_show, NULL);
2321 }
2322
2323 static const struct file_operations ipv6_route_proc_fops = {
2324         .owner          = THIS_MODULE,
2325         .open           = ipv6_route_open,
2326         .read           = seq_read,
2327         .llseek         = seq_lseek,
2328         .release        = single_release,
2329 };
2330
2331 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2332 {
2333         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2334                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2335                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2336                       rt6_stats.fib_rt_cache,
2337                       atomic_read(&ip6_dst_ops.entries),
2338                       rt6_stats.fib_discarded_routes);
2339
2340         return 0;
2341 }
2342
2343 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2344 {
2345         return single_open(file, rt6_stats_seq_show, NULL);
2346 }
2347
2348 static const struct file_operations rt6_stats_seq_fops = {
2349         .owner   = THIS_MODULE,
2350         .open    = rt6_stats_seq_open,
2351         .read    = seq_read,
2352         .llseek  = seq_lseek,
2353         .release = single_release,
2354 };
2355
2356 static int ipv6_route_proc_init(struct net *net)
2357 {
2358         int ret = -ENOMEM;
2359         if (!proc_net_fops_create(net, "ipv6_route",
2360                                   0, &ipv6_route_proc_fops))
2361                 goto out;
2362
2363         if (!proc_net_fops_create(net, "rt6_stats",
2364                                   S_IRUGO, &rt6_stats_seq_fops))
2365                 goto out_ipv6_route;
2366
2367         ret = 0;
2368 out:
2369         return ret;
2370 out_ipv6_route:
2371         proc_net_remove(net, "ipv6_route");
2372         goto out;
2373 }
2374
2375 static void ipv6_route_proc_fini(struct net *net)
2376 {
2377         proc_net_remove(net, "ipv6_route");
2378         proc_net_remove(net, "rt6_stats");
2379 }
2380 #else
2381 static inline int ipv6_route_proc_init(struct net *net)
2382 {
2383         return 0;
2384 }
2385 static inline void ipv6_route_proc_fini(struct net *net)
2386 {
2387         return ;
2388 }
2389 #endif  /* CONFIG_PROC_FS */
2390
2391 #ifdef CONFIG_SYSCTL
2392
2393 static
2394 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2395                               void __user *buffer, size_t *lenp, loff_t *ppos)
2396 {
2397         int delay = init_net.ipv6.sysctl.flush_delay;
2398         if (write) {
2399                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2400                 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay);
2401                 return 0;
2402         } else
2403                 return -EINVAL;
2404 }
2405
2406 ctl_table ipv6_route_table_template[] = {
2407         {
2408                 .procname       =       "flush",
2409                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2410                 .maxlen         =       sizeof(int),
2411                 .mode           =       0200,
2412                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2413         },
2414         {
2415                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2416                 .procname       =       "gc_thresh",
2417                 .data           =       &ip6_dst_ops.gc_thresh,
2418                 .maxlen         =       sizeof(int),
2419                 .mode           =       0644,
2420                 .proc_handler   =       &proc_dointvec,
2421         },
2422         {
2423                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2424                 .procname       =       "max_size",
2425                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2426                 .maxlen         =       sizeof(int),
2427                 .mode           =       0644,
2428                 .proc_handler   =       &proc_dointvec,
2429         },
2430         {
2431                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2432                 .procname       =       "gc_min_interval",
2433                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2434                 .maxlen         =       sizeof(int),
2435                 .mode           =       0644,
2436                 .proc_handler   =       &proc_dointvec_jiffies,
2437                 .strategy       =       &sysctl_jiffies,
2438         },
2439         {
2440                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2441                 .procname       =       "gc_timeout",
2442                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2443                 .maxlen         =       sizeof(int),
2444                 .mode           =       0644,
2445                 .proc_handler   =       &proc_dointvec_jiffies,
2446                 .strategy       =       &sysctl_jiffies,
2447         },
2448         {
2449                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2450                 .procname       =       "gc_interval",
2451                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2452                 .maxlen         =       sizeof(int),
2453                 .mode           =       0644,
2454                 .proc_handler   =       &proc_dointvec_jiffies,
2455                 .strategy       =       &sysctl_jiffies,
2456         },
2457         {
2458                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2459                 .procname       =       "gc_elasticity",
2460                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2461                 .maxlen         =       sizeof(int),
2462                 .mode           =       0644,
2463                 .proc_handler   =       &proc_dointvec_jiffies,
2464                 .strategy       =       &sysctl_jiffies,
2465         },
2466         {
2467                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2468                 .procname       =       "mtu_expires",
2469                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2470                 .maxlen         =       sizeof(int),
2471                 .mode           =       0644,
2472                 .proc_handler   =       &proc_dointvec_jiffies,
2473                 .strategy       =       &sysctl_jiffies,
2474         },
2475         {
2476                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2477                 .procname       =       "min_adv_mss",
2478                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2479                 .maxlen         =       sizeof(int),
2480                 .mode           =       0644,
2481                 .proc_handler   =       &proc_dointvec_jiffies,
2482                 .strategy       =       &sysctl_jiffies,
2483         },
2484         {
2485                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2486                 .procname       =       "gc_min_interval_ms",
2487                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2488                 .maxlen         =       sizeof(int),
2489                 .mode           =       0644,
2490                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2491                 .strategy       =       &sysctl_ms_jiffies,
2492         },
2493         { .ctl_name = 0 }
2494 };
2495
2496 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2497 {
2498         struct ctl_table *table;
2499
2500         table = kmemdup(ipv6_route_table_template,
2501                         sizeof(ipv6_route_table_template),
2502                         GFP_KERNEL);
2503         return table;
2504 }
2505 #endif
2506
2507 int __init ip6_route_init(void)
2508 {
2509         int ret;
2510
2511         ip6_dst_ops.kmem_cachep =
2512                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2513                                   SLAB_HWCACHE_ALIGN, NULL);
2514         if (!ip6_dst_ops.kmem_cachep)
2515                 return -ENOMEM;
2516
2517         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2518
2519         ret = fib6_init();
2520         if (ret)
2521                 goto out_kmem_cache;
2522
2523         ret = ipv6_route_proc_init(&init_net);
2524         if (ret)
2525                 goto out_fib6_init;
2526
2527         ret = xfrm6_init();
2528         if (ret)
2529                 goto out_proc_init;
2530
2531         ret = fib6_rules_init();
2532         if (ret)
2533                 goto xfrm6_init;
2534
2535         ret = -ENOBUFS;
2536         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2537             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2538             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2539                 goto fib6_rules_init;
2540
2541         ret = 0;
2542 out:
2543         return ret;
2544
2545 fib6_rules_init:
2546         fib6_rules_cleanup();
2547 xfrm6_init:
2548         xfrm6_fini();
2549 out_proc_init:
2550         ipv6_route_proc_fini(&init_net);
2551 out_fib6_init:
2552         rt6_ifdown(NULL);
2553         fib6_gc_cleanup();
2554 out_kmem_cache:
2555         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2556         goto out;
2557 }
2558
2559 void ip6_route_cleanup(void)
2560 {
2561         fib6_rules_cleanup();
2562         ipv6_route_proc_fini(&init_net);
2563         xfrm6_fini();
2564         rt6_ifdown(NULL);
2565         fib6_gc_cleanup();
2566         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2567 }