[NETNS][IPV6] rt6_info - move rt6_info structure inside the namespace
[safe/jmp/linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void             ip6_dst_destroy(struct dst_entry *);
81 static void             ip6_dst_ifdown(struct dst_entry *,
82                                        struct net_device *dev, int how);
83 static int               ip6_dst_gc(struct dst_ops *ops);
84
85 static int              ip6_pkt_discard(struct sk_buff *skb);
86 static int              ip6_pkt_discard_out(struct sk_buff *skb);
87 static void             ip6_link_failure(struct sk_buff *skb);
88 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92                                            struct in6_addr *prefix, int prefixlen,
93                                            struct in6_addr *gwaddr, int ifindex,
94                                            unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96                                            struct in6_addr *prefix, int prefixlen,
97                                            struct in6_addr *gwaddr, int ifindex);
98 #endif
99
100 static struct dst_ops ip6_dst_ops = {
101         .family                 =       AF_INET6,
102         .protocol               =       __constant_htons(ETH_P_IPV6),
103         .gc                     =       ip6_dst_gc,
104         .gc_thresh              =       1024,
105         .check                  =       ip6_dst_check,
106         .destroy                =       ip6_dst_destroy,
107         .ifdown                 =       ip6_dst_ifdown,
108         .negative_advice        =       ip6_negative_advice,
109         .link_failure           =       ip6_link_failure,
110         .update_pmtu            =       ip6_rt_update_pmtu,
111         .local_out              =       ip6_local_out,
112         .entry_size             =       sizeof(struct rt6_info),
113         .entries                =       ATOMIC_INIT(0),
114 };
115
116 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117 {
118 }
119
120 static struct dst_ops ip6_dst_blackhole_ops = {
121         .family                 =       AF_INET6,
122         .protocol               =       __constant_htons(ETH_P_IPV6),
123         .destroy                =       ip6_dst_destroy,
124         .check                  =       ip6_dst_check,
125         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
126         .entry_size             =       sizeof(struct rt6_info),
127         .entries                =       ATOMIC_INIT(0),
128 };
129
130 static struct rt6_info ip6_null_entry_template = {
131         .u = {
132                 .dst = {
133                         .__refcnt       = ATOMIC_INIT(1),
134                         .__use          = 1,
135                         .obsolete       = -1,
136                         .error          = -ENETUNREACH,
137                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
138                         .input          = ip6_pkt_discard,
139                         .output         = ip6_pkt_discard_out,
140                         .ops            = &ip6_dst_ops,
141                 }
142         },
143         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
144         .rt6i_metric    = ~(u32) 0,
145         .rt6i_ref       = ATOMIC_INIT(1),
146 };
147
148 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
149
150 static int ip6_pkt_prohibit(struct sk_buff *skb);
151 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
152
153 struct rt6_info ip6_prohibit_entry_template = {
154         .u = {
155                 .dst = {
156                         .__refcnt       = ATOMIC_INIT(1),
157                         .__use          = 1,
158                         .obsolete       = -1,
159                         .error          = -EACCES,
160                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
161                         .input          = ip6_pkt_prohibit,
162                         .output         = ip6_pkt_prohibit_out,
163                         .ops            = &ip6_dst_ops,
164                 }
165         },
166         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
167         .rt6i_metric    = ~(u32) 0,
168         .rt6i_ref       = ATOMIC_INIT(1),
169 };
170
171 static struct rt6_info ip6_blk_hole_entry_template = {
172         .u = {
173                 .dst = {
174                         .__refcnt       = ATOMIC_INIT(1),
175                         .__use          = 1,
176                         .obsolete       = -1,
177                         .error          = -EINVAL,
178                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
179                         .input          = dst_discard,
180                         .output         = dst_discard,
181                         .ops            = &ip6_dst_ops,
182                 }
183         },
184         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
185         .rt6i_metric    = ~(u32) 0,
186         .rt6i_ref       = ATOMIC_INIT(1),
187 };
188
189 #endif
190
191 /* allocate dst with ip6_dst_ops */
192 static __inline__ struct rt6_info *ip6_dst_alloc(void)
193 {
194         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
195 }
196
197 static void ip6_dst_destroy(struct dst_entry *dst)
198 {
199         struct rt6_info *rt = (struct rt6_info *)dst;
200         struct inet6_dev *idev = rt->rt6i_idev;
201
202         if (idev != NULL) {
203                 rt->rt6i_idev = NULL;
204                 in6_dev_put(idev);
205         }
206 }
207
208 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
209                            int how)
210 {
211         struct rt6_info *rt = (struct rt6_info *)dst;
212         struct inet6_dev *idev = rt->rt6i_idev;
213         struct net_device *loopback_dev =
214                 dev->nd_net->loopback_dev;
215
216         if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
217                 struct inet6_dev *loopback_idev =
218                         in6_dev_get(loopback_dev);
219                 if (loopback_idev != NULL) {
220                         rt->rt6i_idev = loopback_idev;
221                         in6_dev_put(idev);
222                 }
223         }
224 }
225
226 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
227 {
228         return (rt->rt6i_flags & RTF_EXPIRES &&
229                 time_after(jiffies, rt->rt6i_expires));
230 }
231
232 static inline int rt6_need_strict(struct in6_addr *daddr)
233 {
234         return (ipv6_addr_type(daddr) &
235                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
236 }
237
238 /*
239  *      Route lookup. Any table->tb6_lock is implied.
240  */
241
242 static inline struct rt6_info *rt6_device_match(struct net *net,
243                                                     struct rt6_info *rt,
244                                                     int oif,
245                                                     int strict)
246 {
247         struct rt6_info *local = NULL;
248         struct rt6_info *sprt;
249
250         if (oif) {
251                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252                         struct net_device *dev = sprt->rt6i_dev;
253                         if (dev->ifindex == oif)
254                                 return sprt;
255                         if (dev->flags & IFF_LOOPBACK) {
256                                 if (sprt->rt6i_idev == NULL ||
257                                     sprt->rt6i_idev->dev->ifindex != oif) {
258                                         if (strict && oif)
259                                                 continue;
260                                         if (local && (!oif ||
261                                                       local->rt6i_idev->dev->ifindex == oif))
262                                                 continue;
263                                 }
264                                 local = sprt;
265                         }
266                 }
267
268                 if (local)
269                         return local;
270
271                 if (strict)
272                         return net->ipv6.ip6_null_entry;
273         }
274         return rt;
275 }
276
277 #ifdef CONFIG_IPV6_ROUTER_PREF
278 static void rt6_probe(struct rt6_info *rt)
279 {
280         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
281         /*
282          * Okay, this does not seem to be appropriate
283          * for now, however, we need to check if it
284          * is really so; aka Router Reachability Probing.
285          *
286          * Router Reachability Probe MUST be rate-limited
287          * to no more than one per minute.
288          */
289         if (!neigh || (neigh->nud_state & NUD_VALID))
290                 return;
291         read_lock_bh(&neigh->lock);
292         if (!(neigh->nud_state & NUD_VALID) &&
293             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
294                 struct in6_addr mcaddr;
295                 struct in6_addr *target;
296
297                 neigh->updated = jiffies;
298                 read_unlock_bh(&neigh->lock);
299
300                 target = (struct in6_addr *)&neigh->primary_key;
301                 addrconf_addr_solict_mult(target, &mcaddr);
302                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
303         } else
304                 read_unlock_bh(&neigh->lock);
305 }
306 #else
307 static inline void rt6_probe(struct rt6_info *rt)
308 {
309         return;
310 }
311 #endif
312
313 /*
314  * Default Router Selection (RFC 2461 6.3.6)
315  */
316 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
317 {
318         struct net_device *dev = rt->rt6i_dev;
319         if (!oif || dev->ifindex == oif)
320                 return 2;
321         if ((dev->flags & IFF_LOOPBACK) &&
322             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
323                 return 1;
324         return 0;
325 }
326
327 static inline int rt6_check_neigh(struct rt6_info *rt)
328 {
329         struct neighbour *neigh = rt->rt6i_nexthop;
330         int m;
331         if (rt->rt6i_flags & RTF_NONEXTHOP ||
332             !(rt->rt6i_flags & RTF_GATEWAY))
333                 m = 1;
334         else if (neigh) {
335                 read_lock_bh(&neigh->lock);
336                 if (neigh->nud_state & NUD_VALID)
337                         m = 2;
338 #ifdef CONFIG_IPV6_ROUTER_PREF
339                 else if (neigh->nud_state & NUD_FAILED)
340                         m = 0;
341 #endif
342                 else
343                         m = 1;
344                 read_unlock_bh(&neigh->lock);
345         } else
346                 m = 0;
347         return m;
348 }
349
350 static int rt6_score_route(struct rt6_info *rt, int oif,
351                            int strict)
352 {
353         int m, n;
354
355         m = rt6_check_dev(rt, oif);
356         if (!m && (strict & RT6_LOOKUP_F_IFACE))
357                 return -1;
358 #ifdef CONFIG_IPV6_ROUTER_PREF
359         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
360 #endif
361         n = rt6_check_neigh(rt);
362         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
363                 return -1;
364         return m;
365 }
366
367 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
368                                    int *mpri, struct rt6_info *match)
369 {
370         int m;
371
372         if (rt6_check_expired(rt))
373                 goto out;
374
375         m = rt6_score_route(rt, oif, strict);
376         if (m < 0)
377                 goto out;
378
379         if (m > *mpri) {
380                 if (strict & RT6_LOOKUP_F_REACHABLE)
381                         rt6_probe(match);
382                 *mpri = m;
383                 match = rt;
384         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
385                 rt6_probe(rt);
386         }
387
388 out:
389         return match;
390 }
391
392 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
393                                      struct rt6_info *rr_head,
394                                      u32 metric, int oif, int strict)
395 {
396         struct rt6_info *rt, *match;
397         int mpri = -1;
398
399         match = NULL;
400         for (rt = rr_head; rt && rt->rt6i_metric == metric;
401              rt = rt->u.dst.rt6_next)
402                 match = find_match(rt, oif, strict, &mpri, match);
403         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
404              rt = rt->u.dst.rt6_next)
405                 match = find_match(rt, oif, strict, &mpri, match);
406
407         return match;
408 }
409
410 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
411 {
412         struct rt6_info *match, *rt0;
413         struct net *net;
414
415         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416                   __FUNCTION__, fn->leaf, oif);
417
418         rt0 = fn->rr_ptr;
419         if (!rt0)
420                 fn->rr_ptr = rt0 = fn->leaf;
421
422         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
423
424         if (!match &&
425             (strict & RT6_LOOKUP_F_REACHABLE)) {
426                 struct rt6_info *next = rt0->u.dst.rt6_next;
427
428                 /* no entries matched; do round-robin */
429                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
430                         next = fn->leaf;
431
432                 if (next != rt0)
433                         fn->rr_ptr = next;
434         }
435
436         RT6_TRACE("%s() => %p\n",
437                   __FUNCTION__, match);
438
439         net = rt0->rt6i_dev->nd_net;
440         return (match ? match : net->ipv6.ip6_null_entry);
441 }
442
443 #ifdef CONFIG_IPV6_ROUTE_INFO
444 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445                   struct in6_addr *gwaddr)
446 {
447         struct net *net = dev->nd_net;
448         struct route_info *rinfo = (struct route_info *) opt;
449         struct in6_addr prefix_buf, *prefix;
450         unsigned int pref;
451         u32 lifetime;
452         struct rt6_info *rt;
453
454         if (len < sizeof(struct route_info)) {
455                 return -EINVAL;
456         }
457
458         /* Sanity check for prefix_len and length */
459         if (rinfo->length > 3) {
460                 return -EINVAL;
461         } else if (rinfo->prefix_len > 128) {
462                 return -EINVAL;
463         } else if (rinfo->prefix_len > 64) {
464                 if (rinfo->length < 2) {
465                         return -EINVAL;
466                 }
467         } else if (rinfo->prefix_len > 0) {
468                 if (rinfo->length < 1) {
469                         return -EINVAL;
470                 }
471         }
472
473         pref = rinfo->route_pref;
474         if (pref == ICMPV6_ROUTER_PREF_INVALID)
475                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
477         lifetime = ntohl(rinfo->lifetime);
478         if (lifetime == 0xffffffff) {
479                 /* infinity */
480         } else if (lifetime > 0x7fffffff/HZ) {
481                 /* Avoid arithmetic overflow */
482                 lifetime = 0x7fffffff/HZ - 1;
483         }
484
485         if (rinfo->length == 3)
486                 prefix = (struct in6_addr *)rinfo->prefix;
487         else {
488                 /* this function is safe */
489                 ipv6_addr_prefix(&prefix_buf,
490                                  (struct in6_addr *)rinfo->prefix,
491                                  rinfo->prefix_len);
492                 prefix = &prefix_buf;
493         }
494
495         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496                                 dev->ifindex);
497
498         if (rt && !lifetime) {
499                 ip6_del_rt(rt);
500                 rt = NULL;
501         }
502
503         if (!rt && lifetime)
504                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
505                                         pref);
506         else if (rt)
507                 rt->rt6i_flags = RTF_ROUTEINFO |
508                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510         if (rt) {
511                 if (lifetime == 0xffffffff) {
512                         rt->rt6i_flags &= ~RTF_EXPIRES;
513                 } else {
514                         rt->rt6i_expires = jiffies + HZ * lifetime;
515                         rt->rt6i_flags |= RTF_EXPIRES;
516                 }
517                 dst_release(&rt->u.dst);
518         }
519         return 0;
520 }
521 #endif
522
523 #define BACKTRACK(__net, saddr)                 \
524 do { \
525         if (rt == __net->ipv6.ip6_null_entry) { \
526                 struct fib6_node *pn; \
527                 while (1) { \
528                         if (fn->fn_flags & RTN_TL_ROOT) \
529                                 goto out; \
530                         pn = fn->parent; \
531                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
532                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
533                         else \
534                                 fn = pn; \
535                         if (fn->fn_flags & RTN_RTINFO) \
536                                 goto restart; \
537                 } \
538         } \
539 } while(0)
540
541 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
542                                              struct fib6_table *table,
543                                              struct flowi *fl, int flags)
544 {
545         struct fib6_node *fn;
546         struct rt6_info *rt;
547
548         read_lock_bh(&table->tb6_lock);
549         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
550 restart:
551         rt = fn->leaf;
552         rt = rt6_device_match(net, rt, fl->oif, flags);
553         BACKTRACK(net, &fl->fl6_src);
554 out:
555         dst_use(&rt->u.dst, jiffies);
556         read_unlock_bh(&table->tb6_lock);
557         return rt;
558
559 }
560
561 struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
562                             struct in6_addr *saddr, int oif, int strict)
563 {
564         struct flowi fl = {
565                 .oif = oif,
566                 .nl_u = {
567                         .ip6_u = {
568                                 .daddr = *daddr,
569                         },
570                 },
571         };
572         struct dst_entry *dst;
573         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
574
575         if (saddr) {
576                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577                 flags |= RT6_LOOKUP_F_HAS_SADDR;
578         }
579
580         dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
581         if (dst->error == 0)
582                 return (struct rt6_info *) dst;
583
584         dst_release(dst);
585
586         return NULL;
587 }
588
589 EXPORT_SYMBOL(rt6_lookup);
590
591 /* ip6_ins_rt is called with FREE table->tb6_lock.
592    It takes new route entry, the addition fails by any reason the
593    route is freed. In any case, if caller does not hold it, it may
594    be destroyed.
595  */
596
597 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
598 {
599         int err;
600         struct fib6_table *table;
601
602         table = rt->rt6i_table;
603         write_lock_bh(&table->tb6_lock);
604         err = fib6_add(&table->tb6_root, rt, info);
605         write_unlock_bh(&table->tb6_lock);
606
607         return err;
608 }
609
610 int ip6_ins_rt(struct rt6_info *rt)
611 {
612         struct nl_info info = {
613                 .nl_net = rt->rt6i_dev->nd_net,
614         };
615         return __ip6_ins_rt(rt, &info);
616 }
617
618 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619                                       struct in6_addr *saddr)
620 {
621         struct rt6_info *rt;
622
623         /*
624          *      Clone the route.
625          */
626
627         rt = ip6_rt_copy(ort);
628
629         if (rt) {
630                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631                         if (rt->rt6i_dst.plen != 128 &&
632                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633                                 rt->rt6i_flags |= RTF_ANYCAST;
634                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
635                 }
636
637                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
638                 rt->rt6i_dst.plen = 128;
639                 rt->rt6i_flags |= RTF_CACHE;
640                 rt->u.dst.flags |= DST_HOST;
641
642 #ifdef CONFIG_IPV6_SUBTREES
643                 if (rt->rt6i_src.plen && saddr) {
644                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645                         rt->rt6i_src.plen = 128;
646                 }
647 #endif
648
649                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
650
651         }
652
653         return rt;
654 }
655
656 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
657 {
658         struct rt6_info *rt = ip6_rt_copy(ort);
659         if (rt) {
660                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
661                 rt->rt6i_dst.plen = 128;
662                 rt->rt6i_flags |= RTF_CACHE;
663                 rt->u.dst.flags |= DST_HOST;
664                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
665         }
666         return rt;
667 }
668
669 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
670                                       struct flowi *fl, int flags)
671 {
672         struct fib6_node *fn;
673         struct rt6_info *rt, *nrt;
674         int strict = 0;
675         int attempts = 3;
676         int err;
677         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
678
679         strict |= flags & RT6_LOOKUP_F_IFACE;
680
681 relookup:
682         read_lock_bh(&table->tb6_lock);
683
684 restart_2:
685         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
686
687 restart:
688         rt = rt6_select(fn, oif, strict | reachable);
689
690         BACKTRACK(net, &fl->fl6_src);
691         if (rt == net->ipv6.ip6_null_entry ||
692             rt->rt6i_flags & RTF_CACHE)
693                 goto out;
694
695         dst_hold(&rt->u.dst);
696         read_unlock_bh(&table->tb6_lock);
697
698         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
699                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
700         else {
701 #if CLONE_OFFLINK_ROUTE
702                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
703 #else
704                 goto out2;
705 #endif
706         }
707
708         dst_release(&rt->u.dst);
709         rt = nrt ? : net->ipv6.ip6_null_entry;
710
711         dst_hold(&rt->u.dst);
712         if (nrt) {
713                 err = ip6_ins_rt(nrt);
714                 if (!err)
715                         goto out2;
716         }
717
718         if (--attempts <= 0)
719                 goto out2;
720
721         /*
722          * Race condition! In the gap, when table->tb6_lock was
723          * released someone could insert this route.  Relookup.
724          */
725         dst_release(&rt->u.dst);
726         goto relookup;
727
728 out:
729         if (reachable) {
730                 reachable = 0;
731                 goto restart_2;
732         }
733         dst_hold(&rt->u.dst);
734         read_unlock_bh(&table->tb6_lock);
735 out2:
736         rt->u.dst.lastuse = jiffies;
737         rt->u.dst.__use++;
738
739         return rt;
740 }
741
742 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
743                                             struct flowi *fl, int flags)
744 {
745         return ip6_pol_route(net, table, fl->iif, fl, flags);
746 }
747
748 void ip6_route_input(struct sk_buff *skb)
749 {
750         struct ipv6hdr *iph = ipv6_hdr(skb);
751         struct net *net = skb->dev->nd_net;
752         int flags = RT6_LOOKUP_F_HAS_SADDR;
753         struct flowi fl = {
754                 .iif = skb->dev->ifindex,
755                 .nl_u = {
756                         .ip6_u = {
757                                 .daddr = iph->daddr,
758                                 .saddr = iph->saddr,
759                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
760                         },
761                 },
762                 .mark = skb->mark,
763                 .proto = iph->nexthdr,
764         };
765
766         if (rt6_need_strict(&iph->daddr))
767                 flags |= RT6_LOOKUP_F_IFACE;
768
769         skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
770 }
771
772 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
773                                              struct flowi *fl, int flags)
774 {
775         return ip6_pol_route(net, table, fl->oif, fl, flags);
776 }
777
778 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
779 {
780         int flags = 0;
781
782         if (rt6_need_strict(&fl->fl6_dst))
783                 flags |= RT6_LOOKUP_F_IFACE;
784
785         if (!ipv6_addr_any(&fl->fl6_src))
786                 flags |= RT6_LOOKUP_F_HAS_SADDR;
787
788         return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
789 }
790
791 EXPORT_SYMBOL(ip6_route_output);
792
793 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
794 {
795         struct rt6_info *ort = (struct rt6_info *) *dstp;
796         struct rt6_info *rt = (struct rt6_info *)
797                 dst_alloc(&ip6_dst_blackhole_ops);
798         struct dst_entry *new = NULL;
799
800         if (rt) {
801                 new = &rt->u.dst;
802
803                 atomic_set(&new->__refcnt, 1);
804                 new->__use = 1;
805                 new->input = dst_discard;
806                 new->output = dst_discard;
807
808                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
809                 new->dev = ort->u.dst.dev;
810                 if (new->dev)
811                         dev_hold(new->dev);
812                 rt->rt6i_idev = ort->rt6i_idev;
813                 if (rt->rt6i_idev)
814                         in6_dev_hold(rt->rt6i_idev);
815                 rt->rt6i_expires = 0;
816
817                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
818                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
819                 rt->rt6i_metric = 0;
820
821                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
822 #ifdef CONFIG_IPV6_SUBTREES
823                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
824 #endif
825
826                 dst_free(new);
827         }
828
829         dst_release(*dstp);
830         *dstp = new;
831         return (new ? 0 : -ENOMEM);
832 }
833 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
834
835 /*
836  *      Destination cache support functions
837  */
838
839 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
840 {
841         struct rt6_info *rt;
842
843         rt = (struct rt6_info *) dst;
844
845         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
846                 return dst;
847
848         return NULL;
849 }
850
851 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
852 {
853         struct rt6_info *rt = (struct rt6_info *) dst;
854
855         if (rt) {
856                 if (rt->rt6i_flags & RTF_CACHE)
857                         ip6_del_rt(rt);
858                 else
859                         dst_release(dst);
860         }
861         return NULL;
862 }
863
864 static void ip6_link_failure(struct sk_buff *skb)
865 {
866         struct rt6_info *rt;
867
868         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
869
870         rt = (struct rt6_info *) skb->dst;
871         if (rt) {
872                 if (rt->rt6i_flags&RTF_CACHE) {
873                         dst_set_expires(&rt->u.dst, 0);
874                         rt->rt6i_flags |= RTF_EXPIRES;
875                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
876                         rt->rt6i_node->fn_sernum = -1;
877         }
878 }
879
880 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
881 {
882         struct rt6_info *rt6 = (struct rt6_info*)dst;
883
884         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
885                 rt6->rt6i_flags |= RTF_MODIFIED;
886                 if (mtu < IPV6_MIN_MTU) {
887                         mtu = IPV6_MIN_MTU;
888                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
889                 }
890                 dst->metrics[RTAX_MTU-1] = mtu;
891                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
892         }
893 }
894
895 static int ipv6_get_mtu(struct net_device *dev);
896
897 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
898 {
899         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
900
901         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
902                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
903
904         /*
905          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
906          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
907          * IPV6_MAXPLEN is also valid and means: "any MSS,
908          * rely only on pmtu discovery"
909          */
910         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
911                 mtu = IPV6_MAXPLEN;
912         return mtu;
913 }
914
915 static struct dst_entry *icmp6_dst_gc_list;
916 static DEFINE_SPINLOCK(icmp6_dst_lock);
917
918 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
919                                   struct neighbour *neigh,
920                                   struct in6_addr *addr)
921 {
922         struct rt6_info *rt;
923         struct inet6_dev *idev = in6_dev_get(dev);
924         struct net *net = dev->nd_net;
925
926         if (unlikely(idev == NULL))
927                 return NULL;
928
929         rt = ip6_dst_alloc();
930         if (unlikely(rt == NULL)) {
931                 in6_dev_put(idev);
932                 goto out;
933         }
934
935         dev_hold(dev);
936         if (neigh)
937                 neigh_hold(neigh);
938         else
939                 neigh = ndisc_get_neigh(dev, addr);
940
941         rt->rt6i_dev      = dev;
942         rt->rt6i_idev     = idev;
943         rt->rt6i_nexthop  = neigh;
944         atomic_set(&rt->u.dst.__refcnt, 1);
945         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
946         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
947         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
948         rt->u.dst.output  = ip6_output;
949
950 #if 0   /* there's no chance to use these for ndisc */
951         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
952                                 ? DST_HOST
953                                 : 0;
954         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
955         rt->rt6i_dst.plen = 128;
956 #endif
957
958         spin_lock_bh(&icmp6_dst_lock);
959         rt->u.dst.next = icmp6_dst_gc_list;
960         icmp6_dst_gc_list = &rt->u.dst;
961         spin_unlock_bh(&icmp6_dst_lock);
962
963         fib6_force_start_gc(net);
964
965 out:
966         return &rt->u.dst;
967 }
968
969 int icmp6_dst_gc(int *more)
970 {
971         struct dst_entry *dst, *next, **pprev;
972         int freed;
973
974         next = NULL;
975         freed = 0;
976
977         spin_lock_bh(&icmp6_dst_lock);
978         pprev = &icmp6_dst_gc_list;
979
980         while ((dst = *pprev) != NULL) {
981                 if (!atomic_read(&dst->__refcnt)) {
982                         *pprev = dst->next;
983                         dst_free(dst);
984                         freed++;
985                 } else {
986                         pprev = &dst->next;
987                         (*more)++;
988                 }
989         }
990
991         spin_unlock_bh(&icmp6_dst_lock);
992
993         return freed;
994 }
995
996 static int ip6_dst_gc(struct dst_ops *ops)
997 {
998         static unsigned expire = 30*HZ;
999         static unsigned long last_gc;
1000         unsigned long now = jiffies;
1001
1002         if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1003             atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1004                 goto out;
1005
1006         expire++;
1007         fib6_run_gc(expire, &init_net);
1008         last_gc = now;
1009         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1010                 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1011
1012 out:
1013         expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1014         return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1015 }
1016
1017 /* Clean host part of a prefix. Not necessary in radix tree,
1018    but results in cleaner routing tables.
1019
1020    Remove it only when all the things will work!
1021  */
1022
1023 static int ipv6_get_mtu(struct net_device *dev)
1024 {
1025         int mtu = IPV6_MIN_MTU;
1026         struct inet6_dev *idev;
1027
1028         idev = in6_dev_get(dev);
1029         if (idev) {
1030                 mtu = idev->cnf.mtu6;
1031                 in6_dev_put(idev);
1032         }
1033         return mtu;
1034 }
1035
1036 int ipv6_get_hoplimit(struct net_device *dev)
1037 {
1038         int hoplimit = ipv6_devconf.hop_limit;
1039         struct inet6_dev *idev;
1040
1041         idev = in6_dev_get(dev);
1042         if (idev) {
1043                 hoplimit = idev->cnf.hop_limit;
1044                 in6_dev_put(idev);
1045         }
1046         return hoplimit;
1047 }
1048
1049 /*
1050  *
1051  */
1052
1053 int ip6_route_add(struct fib6_config *cfg)
1054 {
1055         int err;
1056         struct net *net = cfg->fc_nlinfo.nl_net;
1057         struct rt6_info *rt = NULL;
1058         struct net_device *dev = NULL;
1059         struct inet6_dev *idev = NULL;
1060         struct fib6_table *table;
1061         int addr_type;
1062
1063         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1064                 return -EINVAL;
1065 #ifndef CONFIG_IPV6_SUBTREES
1066         if (cfg->fc_src_len)
1067                 return -EINVAL;
1068 #endif
1069         if (cfg->fc_ifindex) {
1070                 err = -ENODEV;
1071                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1072                 if (!dev)
1073                         goto out;
1074                 idev = in6_dev_get(dev);
1075                 if (!idev)
1076                         goto out;
1077         }
1078
1079         if (cfg->fc_metric == 0)
1080                 cfg->fc_metric = IP6_RT_PRIO_USER;
1081
1082         table = fib6_new_table(net, cfg->fc_table);
1083         if (table == NULL) {
1084                 err = -ENOBUFS;
1085                 goto out;
1086         }
1087
1088         rt = ip6_dst_alloc();
1089
1090         if (rt == NULL) {
1091                 err = -ENOMEM;
1092                 goto out;
1093         }
1094
1095         rt->u.dst.obsolete = -1;
1096         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1097
1098         if (cfg->fc_protocol == RTPROT_UNSPEC)
1099                 cfg->fc_protocol = RTPROT_BOOT;
1100         rt->rt6i_protocol = cfg->fc_protocol;
1101
1102         addr_type = ipv6_addr_type(&cfg->fc_dst);
1103
1104         if (addr_type & IPV6_ADDR_MULTICAST)
1105                 rt->u.dst.input = ip6_mc_input;
1106         else
1107                 rt->u.dst.input = ip6_forward;
1108
1109         rt->u.dst.output = ip6_output;
1110
1111         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1112         rt->rt6i_dst.plen = cfg->fc_dst_len;
1113         if (rt->rt6i_dst.plen == 128)
1114                rt->u.dst.flags = DST_HOST;
1115
1116 #ifdef CONFIG_IPV6_SUBTREES
1117         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1118         rt->rt6i_src.plen = cfg->fc_src_len;
1119 #endif
1120
1121         rt->rt6i_metric = cfg->fc_metric;
1122
1123         /* We cannot add true routes via loopback here,
1124            they would result in kernel looping; promote them to reject routes
1125          */
1126         if ((cfg->fc_flags & RTF_REJECT) ||
1127             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1128                 /* hold loopback dev/idev if we haven't done so. */
1129                 if (dev != net->loopback_dev) {
1130                         if (dev) {
1131                                 dev_put(dev);
1132                                 in6_dev_put(idev);
1133                         }
1134                         dev = net->loopback_dev;
1135                         dev_hold(dev);
1136                         idev = in6_dev_get(dev);
1137                         if (!idev) {
1138                                 err = -ENODEV;
1139                                 goto out;
1140                         }
1141                 }
1142                 rt->u.dst.output = ip6_pkt_discard_out;
1143                 rt->u.dst.input = ip6_pkt_discard;
1144                 rt->u.dst.error = -ENETUNREACH;
1145                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1146                 goto install_route;
1147         }
1148
1149         if (cfg->fc_flags & RTF_GATEWAY) {
1150                 struct in6_addr *gw_addr;
1151                 int gwa_type;
1152
1153                 gw_addr = &cfg->fc_gateway;
1154                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1155                 gwa_type = ipv6_addr_type(gw_addr);
1156
1157                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1158                         struct rt6_info *grt;
1159
1160                         /* IPv6 strictly inhibits using not link-local
1161                            addresses as nexthop address.
1162                            Otherwise, router will not able to send redirects.
1163                            It is very good, but in some (rare!) circumstances
1164                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1165                            some exceptions. --ANK
1166                          */
1167                         err = -EINVAL;
1168                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1169                                 goto out;
1170
1171                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1172
1173                         err = -EHOSTUNREACH;
1174                         if (grt == NULL)
1175                                 goto out;
1176                         if (dev) {
1177                                 if (dev != grt->rt6i_dev) {
1178                                         dst_release(&grt->u.dst);
1179                                         goto out;
1180                                 }
1181                         } else {
1182                                 dev = grt->rt6i_dev;
1183                                 idev = grt->rt6i_idev;
1184                                 dev_hold(dev);
1185                                 in6_dev_hold(grt->rt6i_idev);
1186                         }
1187                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1188                                 err = 0;
1189                         dst_release(&grt->u.dst);
1190
1191                         if (err)
1192                                 goto out;
1193                 }
1194                 err = -EINVAL;
1195                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1196                         goto out;
1197         }
1198
1199         err = -ENODEV;
1200         if (dev == NULL)
1201                 goto out;
1202
1203         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1204                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1205                 if (IS_ERR(rt->rt6i_nexthop)) {
1206                         err = PTR_ERR(rt->rt6i_nexthop);
1207                         rt->rt6i_nexthop = NULL;
1208                         goto out;
1209                 }
1210         }
1211
1212         rt->rt6i_flags = cfg->fc_flags;
1213
1214 install_route:
1215         if (cfg->fc_mx) {
1216                 struct nlattr *nla;
1217                 int remaining;
1218
1219                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1220                         int type = nla_type(nla);
1221
1222                         if (type) {
1223                                 if (type > RTAX_MAX) {
1224                                         err = -EINVAL;
1225                                         goto out;
1226                                 }
1227
1228                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1229                         }
1230                 }
1231         }
1232
1233         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1234                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1235         if (!rt->u.dst.metrics[RTAX_MTU-1])
1236                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1237         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1238                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1239         rt->u.dst.dev = dev;
1240         rt->rt6i_idev = idev;
1241         rt->rt6i_table = table;
1242
1243         cfg->fc_nlinfo.nl_net = dev->nd_net;
1244
1245         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1246
1247 out:
1248         if (dev)
1249                 dev_put(dev);
1250         if (idev)
1251                 in6_dev_put(idev);
1252         if (rt)
1253                 dst_free(&rt->u.dst);
1254         return err;
1255 }
1256
1257 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1258 {
1259         int err;
1260         struct fib6_table *table;
1261         struct net *net = rt->rt6i_dev->nd_net;
1262
1263         if (rt == net->ipv6.ip6_null_entry)
1264                 return -ENOENT;
1265
1266         table = rt->rt6i_table;
1267         write_lock_bh(&table->tb6_lock);
1268
1269         err = fib6_del(rt, info);
1270         dst_release(&rt->u.dst);
1271
1272         write_unlock_bh(&table->tb6_lock);
1273
1274         return err;
1275 }
1276
1277 int ip6_del_rt(struct rt6_info *rt)
1278 {
1279         struct nl_info info = {
1280                 .nl_net = rt->rt6i_dev->nd_net,
1281         };
1282         return __ip6_del_rt(rt, &info);
1283 }
1284
1285 static int ip6_route_del(struct fib6_config *cfg)
1286 {
1287         struct fib6_table *table;
1288         struct fib6_node *fn;
1289         struct rt6_info *rt;
1290         int err = -ESRCH;
1291
1292         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1293         if (table == NULL)
1294                 return err;
1295
1296         read_lock_bh(&table->tb6_lock);
1297
1298         fn = fib6_locate(&table->tb6_root,
1299                          &cfg->fc_dst, cfg->fc_dst_len,
1300                          &cfg->fc_src, cfg->fc_src_len);
1301
1302         if (fn) {
1303                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1304                         if (cfg->fc_ifindex &&
1305                             (rt->rt6i_dev == NULL ||
1306                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1307                                 continue;
1308                         if (cfg->fc_flags & RTF_GATEWAY &&
1309                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1310                                 continue;
1311                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1312                                 continue;
1313                         dst_hold(&rt->u.dst);
1314                         read_unlock_bh(&table->tb6_lock);
1315
1316                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1317                 }
1318         }
1319         read_unlock_bh(&table->tb6_lock);
1320
1321         return err;
1322 }
1323
1324 /*
1325  *      Handle redirects
1326  */
1327 struct ip6rd_flowi {
1328         struct flowi fl;
1329         struct in6_addr gateway;
1330 };
1331
1332 static struct rt6_info *__ip6_route_redirect(struct net *net,
1333                                              struct fib6_table *table,
1334                                              struct flowi *fl,
1335                                              int flags)
1336 {
1337         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1338         struct rt6_info *rt;
1339         struct fib6_node *fn;
1340
1341         /*
1342          * Get the "current" route for this destination and
1343          * check if the redirect has come from approriate router.
1344          *
1345          * RFC 2461 specifies that redirects should only be
1346          * accepted if they come from the nexthop to the target.
1347          * Due to the way the routes are chosen, this notion
1348          * is a bit fuzzy and one might need to check all possible
1349          * routes.
1350          */
1351
1352         read_lock_bh(&table->tb6_lock);
1353         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1354 restart:
1355         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1356                 /*
1357                  * Current route is on-link; redirect is always invalid.
1358                  *
1359                  * Seems, previous statement is not true. It could
1360                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1361                  * But then router serving it might decide, that we should
1362                  * know truth 8)8) --ANK (980726).
1363                  */
1364                 if (rt6_check_expired(rt))
1365                         continue;
1366                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1367                         continue;
1368                 if (fl->oif != rt->rt6i_dev->ifindex)
1369                         continue;
1370                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1371                         continue;
1372                 break;
1373         }
1374
1375         if (!rt)
1376                 rt = net->ipv6.ip6_null_entry;
1377         BACKTRACK(net, &fl->fl6_src);
1378 out:
1379         dst_hold(&rt->u.dst);
1380
1381         read_unlock_bh(&table->tb6_lock);
1382
1383         return rt;
1384 };
1385
1386 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1387                                            struct in6_addr *src,
1388                                            struct in6_addr *gateway,
1389                                            struct net_device *dev)
1390 {
1391         int flags = RT6_LOOKUP_F_HAS_SADDR;
1392         struct net *net = dev->nd_net;
1393         struct ip6rd_flowi rdfl = {
1394                 .fl = {
1395                         .oif = dev->ifindex,
1396                         .nl_u = {
1397                                 .ip6_u = {
1398                                         .daddr = *dest,
1399                                         .saddr = *src,
1400                                 },
1401                         },
1402                 },
1403                 .gateway = *gateway,
1404         };
1405
1406         if (rt6_need_strict(dest))
1407                 flags |= RT6_LOOKUP_F_IFACE;
1408
1409         return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1410                                                    flags, __ip6_route_redirect);
1411 }
1412
1413 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1414                   struct in6_addr *saddr,
1415                   struct neighbour *neigh, u8 *lladdr, int on_link)
1416 {
1417         struct rt6_info *rt, *nrt = NULL;
1418         struct netevent_redirect netevent;
1419         struct net *net = neigh->dev->nd_net;
1420
1421         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1422
1423         if (rt == net->ipv6.ip6_null_entry) {
1424                 if (net_ratelimit())
1425                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1426                                "for redirect target\n");
1427                 goto out;
1428         }
1429
1430         /*
1431          *      We have finally decided to accept it.
1432          */
1433
1434         neigh_update(neigh, lladdr, NUD_STALE,
1435                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1436                      NEIGH_UPDATE_F_OVERRIDE|
1437                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1438                                      NEIGH_UPDATE_F_ISROUTER))
1439                      );
1440
1441         /*
1442          * Redirect received -> path was valid.
1443          * Look, redirects are sent only in response to data packets,
1444          * so that this nexthop apparently is reachable. --ANK
1445          */
1446         dst_confirm(&rt->u.dst);
1447
1448         /* Duplicate redirect: silently ignore. */
1449         if (neigh == rt->u.dst.neighbour)
1450                 goto out;
1451
1452         nrt = ip6_rt_copy(rt);
1453         if (nrt == NULL)
1454                 goto out;
1455
1456         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1457         if (on_link)
1458                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1459
1460         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1461         nrt->rt6i_dst.plen = 128;
1462         nrt->u.dst.flags |= DST_HOST;
1463
1464         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1465         nrt->rt6i_nexthop = neigh_clone(neigh);
1466         /* Reset pmtu, it may be better */
1467         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1468         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1469                                                         dst_mtu(&nrt->u.dst));
1470
1471         if (ip6_ins_rt(nrt))
1472                 goto out;
1473
1474         netevent.old = &rt->u.dst;
1475         netevent.new = &nrt->u.dst;
1476         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1477
1478         if (rt->rt6i_flags&RTF_CACHE) {
1479                 ip6_del_rt(rt);
1480                 return;
1481         }
1482
1483 out:
1484         dst_release(&rt->u.dst);
1485         return;
1486 }
1487
1488 /*
1489  *      Handle ICMP "packet too big" messages
1490  *      i.e. Path MTU discovery
1491  */
1492
1493 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1494                         struct net_device *dev, u32 pmtu)
1495 {
1496         struct rt6_info *rt, *nrt;
1497         struct net *net = dev->nd_net;
1498         int allfrag = 0;
1499
1500         rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1501         if (rt == NULL)
1502                 return;
1503
1504         if (pmtu >= dst_mtu(&rt->u.dst))
1505                 goto out;
1506
1507         if (pmtu < IPV6_MIN_MTU) {
1508                 /*
1509                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1510                  * MTU (1280) and a fragment header should always be included
1511                  * after a node receiving Too Big message reporting PMTU is
1512                  * less than the IPv6 Minimum Link MTU.
1513                  */
1514                 pmtu = IPV6_MIN_MTU;
1515                 allfrag = 1;
1516         }
1517
1518         /* New mtu received -> path was valid.
1519            They are sent only in response to data packets,
1520            so that this nexthop apparently is reachable. --ANK
1521          */
1522         dst_confirm(&rt->u.dst);
1523
1524         /* Host route. If it is static, it would be better
1525            not to override it, but add new one, so that
1526            when cache entry will expire old pmtu
1527            would return automatically.
1528          */
1529         if (rt->rt6i_flags & RTF_CACHE) {
1530                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1531                 if (allfrag)
1532                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1533                 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1534                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1535                 goto out;
1536         }
1537
1538         /* Network route.
1539            Two cases are possible:
1540            1. It is connected route. Action: COW
1541            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1542          */
1543         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1544                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1545         else
1546                 nrt = rt6_alloc_clone(rt, daddr);
1547
1548         if (nrt) {
1549                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1550                 if (allfrag)
1551                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1552
1553                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1554                  * happened within 5 mins, the recommended timer is 10 mins.
1555                  * Here this route expiration time is set to ip6_rt_mtu_expires
1556                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1557                  * and detecting PMTU increase will be automatically happened.
1558                  */
1559                 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1560                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1561
1562                 ip6_ins_rt(nrt);
1563         }
1564 out:
1565         dst_release(&rt->u.dst);
1566 }
1567
1568 /*
1569  *      Misc support functions
1570  */
1571
1572 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1573 {
1574         struct rt6_info *rt = ip6_dst_alloc();
1575
1576         if (rt) {
1577                 rt->u.dst.input = ort->u.dst.input;
1578                 rt->u.dst.output = ort->u.dst.output;
1579
1580                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1581                 rt->u.dst.error = ort->u.dst.error;
1582                 rt->u.dst.dev = ort->u.dst.dev;
1583                 if (rt->u.dst.dev)
1584                         dev_hold(rt->u.dst.dev);
1585                 rt->rt6i_idev = ort->rt6i_idev;
1586                 if (rt->rt6i_idev)
1587                         in6_dev_hold(rt->rt6i_idev);
1588                 rt->u.dst.lastuse = jiffies;
1589                 rt->rt6i_expires = 0;
1590
1591                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1592                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1593                 rt->rt6i_metric = 0;
1594
1595                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1596 #ifdef CONFIG_IPV6_SUBTREES
1597                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1598 #endif
1599                 rt->rt6i_table = ort->rt6i_table;
1600         }
1601         return rt;
1602 }
1603
1604 #ifdef CONFIG_IPV6_ROUTE_INFO
1605 static struct rt6_info *rt6_get_route_info(struct net *net,
1606                                            struct in6_addr *prefix, int prefixlen,
1607                                            struct in6_addr *gwaddr, int ifindex)
1608 {
1609         struct fib6_node *fn;
1610         struct rt6_info *rt = NULL;
1611         struct fib6_table *table;
1612
1613         table = fib6_get_table(net, RT6_TABLE_INFO);
1614         if (table == NULL)
1615                 return NULL;
1616
1617         write_lock_bh(&table->tb6_lock);
1618         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1619         if (!fn)
1620                 goto out;
1621
1622         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1623                 if (rt->rt6i_dev->ifindex != ifindex)
1624                         continue;
1625                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1626                         continue;
1627                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1628                         continue;
1629                 dst_hold(&rt->u.dst);
1630                 break;
1631         }
1632 out:
1633         write_unlock_bh(&table->tb6_lock);
1634         return rt;
1635 }
1636
1637 static struct rt6_info *rt6_add_route_info(struct net *net,
1638                                            struct in6_addr *prefix, int prefixlen,
1639                                            struct in6_addr *gwaddr, int ifindex,
1640                                            unsigned pref)
1641 {
1642         struct fib6_config cfg = {
1643                 .fc_table       = RT6_TABLE_INFO,
1644                 .fc_metric      = IP6_RT_PRIO_USER,
1645                 .fc_ifindex     = ifindex,
1646                 .fc_dst_len     = prefixlen,
1647                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1648                                   RTF_UP | RTF_PREF(pref),
1649                 .fc_nlinfo.pid = 0,
1650                 .fc_nlinfo.nlh = NULL,
1651                 .fc_nlinfo.nl_net = net,
1652         };
1653
1654         ipv6_addr_copy(&cfg.fc_dst, prefix);
1655         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1656
1657         /* We should treat it as a default route if prefix length is 0. */
1658         if (!prefixlen)
1659                 cfg.fc_flags |= RTF_DEFAULT;
1660
1661         ip6_route_add(&cfg);
1662
1663         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1664 }
1665 #endif
1666
1667 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1668 {
1669         struct rt6_info *rt;
1670         struct fib6_table *table;
1671
1672         table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
1673         if (table == NULL)
1674                 return NULL;
1675
1676         write_lock_bh(&table->tb6_lock);
1677         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1678                 if (dev == rt->rt6i_dev &&
1679                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1680                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1681                         break;
1682         }
1683         if (rt)
1684                 dst_hold(&rt->u.dst);
1685         write_unlock_bh(&table->tb6_lock);
1686         return rt;
1687 }
1688
1689 EXPORT_SYMBOL(rt6_get_dflt_router);
1690
1691 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1692                                      struct net_device *dev,
1693                                      unsigned int pref)
1694 {
1695         struct fib6_config cfg = {
1696                 .fc_table       = RT6_TABLE_DFLT,
1697                 .fc_metric      = IP6_RT_PRIO_USER,
1698                 .fc_ifindex     = dev->ifindex,
1699                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1700                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1701                 .fc_nlinfo.pid = 0,
1702                 .fc_nlinfo.nlh = NULL,
1703                 .fc_nlinfo.nl_net = dev->nd_net,
1704         };
1705
1706         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1707
1708         ip6_route_add(&cfg);
1709
1710         return rt6_get_dflt_router(gwaddr, dev);
1711 }
1712
1713 void rt6_purge_dflt_routers(struct net *net)
1714 {
1715         struct rt6_info *rt;
1716         struct fib6_table *table;
1717
1718         /* NOTE: Keep consistent with rt6_get_dflt_router */
1719         table = fib6_get_table(net, RT6_TABLE_DFLT);
1720         if (table == NULL)
1721                 return;
1722
1723 restart:
1724         read_lock_bh(&table->tb6_lock);
1725         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1726                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1727                         dst_hold(&rt->u.dst);
1728                         read_unlock_bh(&table->tb6_lock);
1729                         ip6_del_rt(rt);
1730                         goto restart;
1731                 }
1732         }
1733         read_unlock_bh(&table->tb6_lock);
1734 }
1735
1736 static void rtmsg_to_fib6_config(struct net *net,
1737                                  struct in6_rtmsg *rtmsg,
1738                                  struct fib6_config *cfg)
1739 {
1740         memset(cfg, 0, sizeof(*cfg));
1741
1742         cfg->fc_table = RT6_TABLE_MAIN;
1743         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1744         cfg->fc_metric = rtmsg->rtmsg_metric;
1745         cfg->fc_expires = rtmsg->rtmsg_info;
1746         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1747         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1748         cfg->fc_flags = rtmsg->rtmsg_flags;
1749
1750         cfg->fc_nlinfo.nl_net = net;
1751
1752         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1753         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1754         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1755 }
1756
1757 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1758 {
1759         struct fib6_config cfg;
1760         struct in6_rtmsg rtmsg;
1761         int err;
1762
1763         switch(cmd) {
1764         case SIOCADDRT:         /* Add a route */
1765         case SIOCDELRT:         /* Delete a route */
1766                 if (!capable(CAP_NET_ADMIN))
1767                         return -EPERM;
1768                 err = copy_from_user(&rtmsg, arg,
1769                                      sizeof(struct in6_rtmsg));
1770                 if (err)
1771                         return -EFAULT;
1772
1773                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1774
1775                 rtnl_lock();
1776                 switch (cmd) {
1777                 case SIOCADDRT:
1778                         err = ip6_route_add(&cfg);
1779                         break;
1780                 case SIOCDELRT:
1781                         err = ip6_route_del(&cfg);
1782                         break;
1783                 default:
1784                         err = -EINVAL;
1785                 }
1786                 rtnl_unlock();
1787
1788                 return err;
1789         }
1790
1791         return -EINVAL;
1792 }
1793
1794 /*
1795  *      Drop the packet on the floor
1796  */
1797
1798 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1799 {
1800         int type;
1801         switch (ipstats_mib_noroutes) {
1802         case IPSTATS_MIB_INNOROUTES:
1803                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1804                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1805                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1806                         break;
1807                 }
1808                 /* FALLTHROUGH */
1809         case IPSTATS_MIB_OUTNOROUTES:
1810                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1811                 break;
1812         }
1813         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1814         kfree_skb(skb);
1815         return 0;
1816 }
1817
1818 static int ip6_pkt_discard(struct sk_buff *skb)
1819 {
1820         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1821 }
1822
1823 static int ip6_pkt_discard_out(struct sk_buff *skb)
1824 {
1825         skb->dev = skb->dst->dev;
1826         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1827 }
1828
1829 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1830
1831 static int ip6_pkt_prohibit(struct sk_buff *skb)
1832 {
1833         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1834 }
1835
1836 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1837 {
1838         skb->dev = skb->dst->dev;
1839         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1840 }
1841
1842 #endif
1843
1844 /*
1845  *      Allocate a dst for local (unicast / anycast) address.
1846  */
1847
1848 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1849                                     const struct in6_addr *addr,
1850                                     int anycast)
1851 {
1852         struct net *net = idev->dev->nd_net;
1853         struct rt6_info *rt = ip6_dst_alloc();
1854
1855         if (rt == NULL)
1856                 return ERR_PTR(-ENOMEM);
1857
1858         dev_hold(net->loopback_dev);
1859         in6_dev_hold(idev);
1860
1861         rt->u.dst.flags = DST_HOST;
1862         rt->u.dst.input = ip6_input;
1863         rt->u.dst.output = ip6_output;
1864         rt->rt6i_dev = net->loopback_dev;
1865         rt->rt6i_idev = idev;
1866         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1867         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1868         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1869         rt->u.dst.obsolete = -1;
1870
1871         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1872         if (anycast)
1873                 rt->rt6i_flags |= RTF_ANYCAST;
1874         else
1875                 rt->rt6i_flags |= RTF_LOCAL;
1876         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1877         if (rt->rt6i_nexthop == NULL) {
1878                 dst_free(&rt->u.dst);
1879                 return ERR_PTR(-ENOMEM);
1880         }
1881
1882         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1883         rt->rt6i_dst.plen = 128;
1884         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1885
1886         atomic_set(&rt->u.dst.__refcnt, 1);
1887
1888         return rt;
1889 }
1890
1891 struct arg_dev_net {
1892         struct net_device *dev;
1893         struct net *net;
1894 };
1895
1896 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1897 {
1898         struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1899         struct net *net = ((struct arg_dev_net *)arg)->net;
1900
1901         if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1902             rt != net->ipv6.ip6_null_entry) {
1903                 RT6_TRACE("deleted by ifdown %p\n", rt);
1904                 return -1;
1905         }
1906         return 0;
1907 }
1908
1909 void rt6_ifdown(struct net *net, struct net_device *dev)
1910 {
1911         struct arg_dev_net adn = {
1912                 .dev = dev,
1913                 .net = net,
1914         };
1915
1916         fib6_clean_all(net, fib6_ifdown, 0, &adn);
1917 }
1918
1919 struct rt6_mtu_change_arg
1920 {
1921         struct net_device *dev;
1922         unsigned mtu;
1923 };
1924
1925 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1926 {
1927         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1928         struct inet6_dev *idev;
1929         struct net *net = arg->dev->nd_net;
1930
1931         /* In IPv6 pmtu discovery is not optional,
1932            so that RTAX_MTU lock cannot disable it.
1933            We still use this lock to block changes
1934            caused by addrconf/ndisc.
1935         */
1936
1937         idev = __in6_dev_get(arg->dev);
1938         if (idev == NULL)
1939                 return 0;
1940
1941         /* For administrative MTU increase, there is no way to discover
1942            IPv6 PMTU increase, so PMTU increase should be updated here.
1943            Since RFC 1981 doesn't include administrative MTU increase
1944            update PMTU increase is a MUST. (i.e. jumbo frame)
1945          */
1946         /*
1947            If new MTU is less than route PMTU, this new MTU will be the
1948            lowest MTU in the path, update the route PMTU to reflect PMTU
1949            decreases; if new MTU is greater than route PMTU, and the
1950            old MTU is the lowest MTU in the path, update the route PMTU
1951            to reflect the increase. In this case if the other nodes' MTU
1952            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1953            PMTU discouvery.
1954          */
1955         if (rt->rt6i_dev == arg->dev &&
1956             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1957             (dst_mtu(&rt->u.dst) >= arg->mtu ||
1958              (dst_mtu(&rt->u.dst) < arg->mtu &&
1959               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1960                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1961                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1962         }
1963         return 0;
1964 }
1965
1966 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1967 {
1968         struct rt6_mtu_change_arg arg = {
1969                 .dev = dev,
1970                 .mtu = mtu,
1971         };
1972
1973         fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1974 }
1975
1976 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1977         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
1978         [RTA_OIF]               = { .type = NLA_U32 },
1979         [RTA_IIF]               = { .type = NLA_U32 },
1980         [RTA_PRIORITY]          = { .type = NLA_U32 },
1981         [RTA_METRICS]           = { .type = NLA_NESTED },
1982 };
1983
1984 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1985                               struct fib6_config *cfg)
1986 {
1987         struct rtmsg *rtm;
1988         struct nlattr *tb[RTA_MAX+1];
1989         int err;
1990
1991         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1992         if (err < 0)
1993                 goto errout;
1994
1995         err = -EINVAL;
1996         rtm = nlmsg_data(nlh);
1997         memset(cfg, 0, sizeof(*cfg));
1998
1999         cfg->fc_table = rtm->rtm_table;
2000         cfg->fc_dst_len = rtm->rtm_dst_len;
2001         cfg->fc_src_len = rtm->rtm_src_len;
2002         cfg->fc_flags = RTF_UP;
2003         cfg->fc_protocol = rtm->rtm_protocol;
2004
2005         if (rtm->rtm_type == RTN_UNREACHABLE)
2006                 cfg->fc_flags |= RTF_REJECT;
2007
2008         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2009         cfg->fc_nlinfo.nlh = nlh;
2010         cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
2011
2012         if (tb[RTA_GATEWAY]) {
2013                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2014                 cfg->fc_flags |= RTF_GATEWAY;
2015         }
2016
2017         if (tb[RTA_DST]) {
2018                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2019
2020                 if (nla_len(tb[RTA_DST]) < plen)
2021                         goto errout;
2022
2023                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2024         }
2025
2026         if (tb[RTA_SRC]) {
2027                 int plen = (rtm->rtm_src_len + 7) >> 3;
2028
2029                 if (nla_len(tb[RTA_SRC]) < plen)
2030                         goto errout;
2031
2032                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2033         }
2034
2035         if (tb[RTA_OIF])
2036                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2037
2038         if (tb[RTA_PRIORITY])
2039                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2040
2041         if (tb[RTA_METRICS]) {
2042                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2043                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2044         }
2045
2046         if (tb[RTA_TABLE])
2047                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2048
2049         err = 0;
2050 errout:
2051         return err;
2052 }
2053
2054 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2055 {
2056         struct fib6_config cfg;
2057         int err;
2058
2059         err = rtm_to_fib6_config(skb, nlh, &cfg);
2060         if (err < 0)
2061                 return err;
2062
2063         return ip6_route_del(&cfg);
2064 }
2065
2066 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2067 {
2068         struct fib6_config cfg;
2069         int err;
2070
2071         err = rtm_to_fib6_config(skb, nlh, &cfg);
2072         if (err < 0)
2073                 return err;
2074
2075         return ip6_route_add(&cfg);
2076 }
2077
2078 static inline size_t rt6_nlmsg_size(void)
2079 {
2080         return NLMSG_ALIGN(sizeof(struct rtmsg))
2081                + nla_total_size(16) /* RTA_SRC */
2082                + nla_total_size(16) /* RTA_DST */
2083                + nla_total_size(16) /* RTA_GATEWAY */
2084                + nla_total_size(16) /* RTA_PREFSRC */
2085                + nla_total_size(4) /* RTA_TABLE */
2086                + nla_total_size(4) /* RTA_IIF */
2087                + nla_total_size(4) /* RTA_OIF */
2088                + nla_total_size(4) /* RTA_PRIORITY */
2089                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2090                + nla_total_size(sizeof(struct rta_cacheinfo));
2091 }
2092
2093 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2094                          struct in6_addr *dst, struct in6_addr *src,
2095                          int iif, int type, u32 pid, u32 seq,
2096                          int prefix, unsigned int flags)
2097 {
2098         struct rtmsg *rtm;
2099         struct nlmsghdr *nlh;
2100         long expires;
2101         u32 table;
2102
2103         if (prefix) {   /* user wants prefix routes only */
2104                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2105                         /* success since this is not a prefix route */
2106                         return 1;
2107                 }
2108         }
2109
2110         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2111         if (nlh == NULL)
2112                 return -EMSGSIZE;
2113
2114         rtm = nlmsg_data(nlh);
2115         rtm->rtm_family = AF_INET6;
2116         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2117         rtm->rtm_src_len = rt->rt6i_src.plen;
2118         rtm->rtm_tos = 0;
2119         if (rt->rt6i_table)
2120                 table = rt->rt6i_table->tb6_id;
2121         else
2122                 table = RT6_TABLE_UNSPEC;
2123         rtm->rtm_table = table;
2124         NLA_PUT_U32(skb, RTA_TABLE, table);
2125         if (rt->rt6i_flags&RTF_REJECT)
2126                 rtm->rtm_type = RTN_UNREACHABLE;
2127         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2128                 rtm->rtm_type = RTN_LOCAL;
2129         else
2130                 rtm->rtm_type = RTN_UNICAST;
2131         rtm->rtm_flags = 0;
2132         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2133         rtm->rtm_protocol = rt->rt6i_protocol;
2134         if (rt->rt6i_flags&RTF_DYNAMIC)
2135                 rtm->rtm_protocol = RTPROT_REDIRECT;
2136         else if (rt->rt6i_flags & RTF_ADDRCONF)
2137                 rtm->rtm_protocol = RTPROT_KERNEL;
2138         else if (rt->rt6i_flags&RTF_DEFAULT)
2139                 rtm->rtm_protocol = RTPROT_RA;
2140
2141         if (rt->rt6i_flags&RTF_CACHE)
2142                 rtm->rtm_flags |= RTM_F_CLONED;
2143
2144         if (dst) {
2145                 NLA_PUT(skb, RTA_DST, 16, dst);
2146                 rtm->rtm_dst_len = 128;
2147         } else if (rtm->rtm_dst_len)
2148                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2149 #ifdef CONFIG_IPV6_SUBTREES
2150         if (src) {
2151                 NLA_PUT(skb, RTA_SRC, 16, src);
2152                 rtm->rtm_src_len = 128;
2153         } else if (rtm->rtm_src_len)
2154                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2155 #endif
2156         if (iif)
2157                 NLA_PUT_U32(skb, RTA_IIF, iif);
2158         else if (dst) {
2159                 struct in6_addr saddr_buf;
2160                 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2161                                        dst, &saddr_buf) == 0)
2162                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2163         }
2164
2165         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2166                 goto nla_put_failure;
2167
2168         if (rt->u.dst.neighbour)
2169                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2170
2171         if (rt->u.dst.dev)
2172                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2173
2174         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2175
2176         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2177         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2178                                expires, rt->u.dst.error) < 0)
2179                 goto nla_put_failure;
2180
2181         return nlmsg_end(skb, nlh);
2182
2183 nla_put_failure:
2184         nlmsg_cancel(skb, nlh);
2185         return -EMSGSIZE;
2186 }
2187
2188 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2189 {
2190         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2191         int prefix;
2192
2193         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2194                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2195                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2196         } else
2197                 prefix = 0;
2198
2199         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2200                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2201                      prefix, NLM_F_MULTI);
2202 }
2203
2204 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2205 {
2206         struct net *net = in_skb->sk->sk_net;
2207         struct nlattr *tb[RTA_MAX+1];
2208         struct rt6_info *rt;
2209         struct sk_buff *skb;
2210         struct rtmsg *rtm;
2211         struct flowi fl;
2212         int err, iif = 0;
2213
2214         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2215         if (err < 0)
2216                 goto errout;
2217
2218         err = -EINVAL;
2219         memset(&fl, 0, sizeof(fl));
2220
2221         if (tb[RTA_SRC]) {
2222                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2223                         goto errout;
2224
2225                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2226         }
2227
2228         if (tb[RTA_DST]) {
2229                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2230                         goto errout;
2231
2232                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2233         }
2234
2235         if (tb[RTA_IIF])
2236                 iif = nla_get_u32(tb[RTA_IIF]);
2237
2238         if (tb[RTA_OIF])
2239                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2240
2241         if (iif) {
2242                 struct net_device *dev;
2243                 dev = __dev_get_by_index(net, iif);
2244                 if (!dev) {
2245                         err = -ENODEV;
2246                         goto errout;
2247                 }
2248         }
2249
2250         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2251         if (skb == NULL) {
2252                 err = -ENOBUFS;
2253                 goto errout;
2254         }
2255
2256         /* Reserve room for dummy headers, this skb can pass
2257            through good chunk of routing engine.
2258          */
2259         skb_reset_mac_header(skb);
2260         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2261
2262         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2263         skb->dst = &rt->u.dst;
2264
2265         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2266                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2267                             nlh->nlmsg_seq, 0, 0);
2268         if (err < 0) {
2269                 kfree_skb(skb);
2270                 goto errout;
2271         }
2272
2273         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2274 errout:
2275         return err;
2276 }
2277
2278 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2279 {
2280         struct sk_buff *skb;
2281         struct net *net = info->nl_net;
2282         u32 seq;
2283         int err;
2284
2285         err = -ENOBUFS;
2286         seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2287
2288         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2289         if (skb == NULL)
2290                 goto errout;
2291
2292         err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2293                                 event, info->pid, seq, 0, 0);
2294         if (err < 0) {
2295                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2296                 WARN_ON(err == -EMSGSIZE);
2297                 kfree_skb(skb);
2298                 goto errout;
2299         }
2300         err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2301                           info->nlh, gfp_any());
2302 errout:
2303         if (err < 0)
2304                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2305 }
2306
2307 static int ip6_route_dev_notify(struct notifier_block *this,
2308                                 unsigned long event, void *data)
2309 {
2310         struct net_device *dev = (struct net_device *)data;
2311         struct net *net = dev->nd_net;
2312
2313         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2314                 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2315                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2316 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2317                 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2318                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2319                 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2320                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2321 #endif
2322         }
2323
2324         return NOTIFY_OK;
2325 }
2326
2327 /*
2328  *      /proc
2329  */
2330
2331 #ifdef CONFIG_PROC_FS
2332
2333 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2334
2335 struct rt6_proc_arg
2336 {
2337         char *buffer;
2338         int offset;
2339         int length;
2340         int skip;
2341         int len;
2342 };
2343
2344 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2345 {
2346         struct seq_file *m = p_arg;
2347
2348         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2349                    rt->rt6i_dst.plen);
2350
2351 #ifdef CONFIG_IPV6_SUBTREES
2352         seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2353                    rt->rt6i_src.plen);
2354 #else
2355         seq_puts(m, "00000000000000000000000000000000 00 ");
2356 #endif
2357
2358         if (rt->rt6i_nexthop) {
2359                 seq_printf(m, NIP6_SEQFMT,
2360                            NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2361         } else {
2362                 seq_puts(m, "00000000000000000000000000000000");
2363         }
2364         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2365                    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2366                    rt->u.dst.__use, rt->rt6i_flags,
2367                    rt->rt6i_dev ? rt->rt6i_dev->name : "");
2368         return 0;
2369 }
2370
2371 static int ipv6_route_show(struct seq_file *m, void *v)
2372 {
2373         struct net *net = (struct net *)m->private;
2374         fib6_clean_all(net, rt6_info_route, 0, m);
2375         return 0;
2376 }
2377
2378 static int ipv6_route_open(struct inode *inode, struct file *file)
2379 {
2380         struct net *net = get_proc_net(inode);
2381         if (!net)
2382                 return -ENXIO;
2383         return single_open(file, ipv6_route_show, net);
2384 }
2385
2386 static int ipv6_route_release(struct inode *inode, struct file *file)
2387 {
2388         struct seq_file *seq = file->private_data;
2389         struct net *net = seq->private;
2390         put_net(net);
2391         return single_release(inode, file);
2392 }
2393
2394 static const struct file_operations ipv6_route_proc_fops = {
2395         .owner          = THIS_MODULE,
2396         .open           = ipv6_route_open,
2397         .read           = seq_read,
2398         .llseek         = seq_lseek,
2399         .release        = ipv6_route_release,
2400 };
2401
2402 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2403 {
2404         struct net *net = (struct net *)seq->private;
2405         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2406                    net->ipv6.rt6_stats->fib_nodes,
2407                    net->ipv6.rt6_stats->fib_route_nodes,
2408                    net->ipv6.rt6_stats->fib_rt_alloc,
2409                    net->ipv6.rt6_stats->fib_rt_entries,
2410                    net->ipv6.rt6_stats->fib_rt_cache,
2411                    atomic_read(&ip6_dst_ops.entries),
2412                    net->ipv6.rt6_stats->fib_discarded_routes);
2413
2414         return 0;
2415 }
2416
2417 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2418 {
2419         struct net *net = get_proc_net(inode);
2420         return single_open(file, rt6_stats_seq_show, net);
2421 }
2422
2423 static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2424 {
2425         struct seq_file *seq = file->private_data;
2426         struct net *net = (struct net *)seq->private;
2427         put_net(net);
2428         return single_release(inode, file);
2429 }
2430
2431 static const struct file_operations rt6_stats_seq_fops = {
2432         .owner   = THIS_MODULE,
2433         .open    = rt6_stats_seq_open,
2434         .read    = seq_read,
2435         .llseek  = seq_lseek,
2436         .release = rt6_stats_seq_release,
2437 };
2438 #endif  /* CONFIG_PROC_FS */
2439
2440 #ifdef CONFIG_SYSCTL
2441
2442 static
2443 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2444                               void __user *buffer, size_t *lenp, loff_t *ppos)
2445 {
2446         struct net *net = current->nsproxy->net_ns;
2447         int delay = net->ipv6.sysctl.flush_delay;
2448         if (write) {
2449                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2450                 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2451                 return 0;
2452         } else
2453                 return -EINVAL;
2454 }
2455
2456 ctl_table ipv6_route_table_template[] = {
2457         {
2458                 .procname       =       "flush",
2459                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2460                 .maxlen         =       sizeof(int),
2461                 .mode           =       0200,
2462                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2463         },
2464         {
2465                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2466                 .procname       =       "gc_thresh",
2467                 .data           =       &ip6_dst_ops.gc_thresh,
2468                 .maxlen         =       sizeof(int),
2469                 .mode           =       0644,
2470                 .proc_handler   =       &proc_dointvec,
2471         },
2472         {
2473                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2474                 .procname       =       "max_size",
2475                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2476                 .maxlen         =       sizeof(int),
2477                 .mode           =       0644,
2478                 .proc_handler   =       &proc_dointvec,
2479         },
2480         {
2481                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2482                 .procname       =       "gc_min_interval",
2483                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2484                 .maxlen         =       sizeof(int),
2485                 .mode           =       0644,
2486                 .proc_handler   =       &proc_dointvec_jiffies,
2487                 .strategy       =       &sysctl_jiffies,
2488         },
2489         {
2490                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2491                 .procname       =       "gc_timeout",
2492                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2493                 .maxlen         =       sizeof(int),
2494                 .mode           =       0644,
2495                 .proc_handler   =       &proc_dointvec_jiffies,
2496                 .strategy       =       &sysctl_jiffies,
2497         },
2498         {
2499                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2500                 .procname       =       "gc_interval",
2501                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2502                 .maxlen         =       sizeof(int),
2503                 .mode           =       0644,
2504                 .proc_handler   =       &proc_dointvec_jiffies,
2505                 .strategy       =       &sysctl_jiffies,
2506         },
2507         {
2508                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2509                 .procname       =       "gc_elasticity",
2510                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2511                 .maxlen         =       sizeof(int),
2512                 .mode           =       0644,
2513                 .proc_handler   =       &proc_dointvec_jiffies,
2514                 .strategy       =       &sysctl_jiffies,
2515         },
2516         {
2517                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2518                 .procname       =       "mtu_expires",
2519                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2520                 .maxlen         =       sizeof(int),
2521                 .mode           =       0644,
2522                 .proc_handler   =       &proc_dointvec_jiffies,
2523                 .strategy       =       &sysctl_jiffies,
2524         },
2525         {
2526                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2527                 .procname       =       "min_adv_mss",
2528                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2529                 .maxlen         =       sizeof(int),
2530                 .mode           =       0644,
2531                 .proc_handler   =       &proc_dointvec_jiffies,
2532                 .strategy       =       &sysctl_jiffies,
2533         },
2534         {
2535                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2536                 .procname       =       "gc_min_interval_ms",
2537                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2538                 .maxlen         =       sizeof(int),
2539                 .mode           =       0644,
2540                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2541                 .strategy       =       &sysctl_ms_jiffies,
2542         },
2543         { .ctl_name = 0 }
2544 };
2545
2546 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2547 {
2548         struct ctl_table *table;
2549
2550         table = kmemdup(ipv6_route_table_template,
2551                         sizeof(ipv6_route_table_template),
2552                         GFP_KERNEL);
2553
2554         if (table) {
2555                 table[0].data = &net->ipv6.sysctl.flush_delay;
2556                 /* table[1].data will be handled when we have
2557                    routes per namespace */
2558                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2559                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2560                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2561                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2562                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2563                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2564                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2565         }
2566
2567         return table;
2568 }
2569 #endif
2570
2571 static int ip6_route_net_init(struct net *net)
2572 {
2573         int ret = 0;
2574
2575         ret = -ENOMEM;
2576         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2577                                            sizeof(*net->ipv6.ip6_null_entry),
2578                                            GFP_KERNEL);
2579         if (!net->ipv6.ip6_null_entry)
2580                 goto out;
2581         net->ipv6.ip6_null_entry->u.dst.path =
2582                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2583
2584 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2585         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2586                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2587                                                GFP_KERNEL);
2588         if (!net->ipv6.ip6_prohibit_entry) {
2589                 kfree(net->ipv6.ip6_null_entry);
2590                 goto out;
2591         }
2592         net->ipv6.ip6_prohibit_entry->u.dst.path =
2593                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2594
2595         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2596                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2597                                                GFP_KERNEL);
2598         if (!net->ipv6.ip6_blk_hole_entry) {
2599                 kfree(net->ipv6.ip6_null_entry);
2600                 kfree(net->ipv6.ip6_prohibit_entry);
2601                 goto out;
2602         }
2603         net->ipv6.ip6_blk_hole_entry->u.dst.path =
2604                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2605 #endif
2606
2607 #ifdef CONFIG_PROC_FS
2608         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2609         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2610 #endif
2611         ret = 0;
2612 out:
2613         return ret;
2614 }
2615
2616 static void ip6_route_net_exit(struct net *net)
2617 {
2618 #ifdef CONFIG_PROC_FS
2619         proc_net_remove(net, "ipv6_route");
2620         proc_net_remove(net, "rt6_stats");
2621 #endif
2622         kfree(net->ipv6.ip6_null_entry);
2623 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2624         kfree(net->ipv6.ip6_prohibit_entry);
2625         kfree(net->ipv6.ip6_blk_hole_entry);
2626 #endif
2627 }
2628
2629 static struct pernet_operations ip6_route_net_ops = {
2630         .init = ip6_route_net_init,
2631         .exit = ip6_route_net_exit,
2632 };
2633
2634 static struct notifier_block ip6_route_dev_notifier = {
2635         .notifier_call = ip6_route_dev_notify,
2636         .priority = 0,
2637 };
2638
2639 int __init ip6_route_init(void)
2640 {
2641         int ret;
2642
2643         ip6_dst_ops.kmem_cachep =
2644                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2645                                   SLAB_HWCACHE_ALIGN, NULL);
2646         if (!ip6_dst_ops.kmem_cachep)
2647                 return -ENOMEM;
2648
2649         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2650
2651         ret = register_pernet_subsys(&ip6_route_net_ops);
2652         if (ret)
2653                 goto out_kmem_cache;
2654
2655         /* Registering of the loopback is done before this portion of code,
2656          * the loopback reference in rt6_info will not be taken, do it
2657          * manually for init_net */
2658         init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2659         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2660   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2661         init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2662         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2663         init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2664         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2665   #endif
2666         ret = fib6_init();
2667         if (ret)
2668                 goto out_register_subsys;
2669
2670         ret = xfrm6_init();
2671         if (ret)
2672                 goto out_fib6_init;
2673
2674         ret = fib6_rules_init();
2675         if (ret)
2676                 goto xfrm6_init;
2677
2678         ret = -ENOBUFS;
2679         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2680             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2681             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2682                 goto fib6_rules_init;
2683
2684         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2685         if (ret)
2686                 goto fib6_rules_init;
2687
2688 out:
2689         return ret;
2690
2691 fib6_rules_init:
2692         fib6_rules_cleanup();
2693 xfrm6_init:
2694         xfrm6_fini();
2695 out_fib6_init:
2696         fib6_gc_cleanup();
2697 out_register_subsys:
2698         unregister_pernet_subsys(&ip6_route_net_ops);
2699 out_kmem_cache:
2700         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2701         goto out;
2702 }
2703
2704 void ip6_route_cleanup(void)
2705 {
2706         unregister_netdevice_notifier(&ip6_route_dev_notifier);
2707         fib6_rules_cleanup();
2708         xfrm6_fini();
2709         fib6_gc_cleanup();
2710         unregister_pernet_subsys(&ip6_route_net_ops);
2711         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2712 }