[IPV6]: ROUTE: Eliminate lock for default route pointer.
[safe/jmp/linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 static struct dst_ops ip6_dst_ops = {
102         .family                 =       AF_INET6,
103         .protocol               =       __constant_htons(ETH_P_IPV6),
104         .gc                     =       ip6_dst_gc,
105         .gc_thresh              =       1024,
106         .check                  =       ip6_dst_check,
107         .destroy                =       ip6_dst_destroy,
108         .ifdown                 =       ip6_dst_ifdown,
109         .negative_advice        =       ip6_negative_advice,
110         .link_failure           =       ip6_link_failure,
111         .update_pmtu            =       ip6_rt_update_pmtu,
112         .entry_size             =       sizeof(struct rt6_info),
113 };
114
115 struct rt6_info ip6_null_entry = {
116         .u = {
117                 .dst = {
118                         .__refcnt       = ATOMIC_INIT(1),
119                         .__use          = 1,
120                         .dev            = &loopback_dev,
121                         .obsolete       = -1,
122                         .error          = -ENETUNREACH,
123                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
124                         .input          = ip6_pkt_discard,
125                         .output         = ip6_pkt_discard_out,
126                         .ops            = &ip6_dst_ops,
127                         .path           = (struct dst_entry*)&ip6_null_entry,
128                 }
129         },
130         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
131         .rt6i_metric    = ~(u32) 0,
132         .rt6i_ref       = ATOMIC_INIT(1),
133 };
134
135 struct fib6_node ip6_routing_table = {
136         .leaf           = &ip6_null_entry,
137         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138 };
139
140 /* Protects all the ip6 fib */
141
142 DEFINE_RWLOCK(rt6_lock);
143
144
145 /* allocate dst with ip6_dst_ops */
146 static __inline__ struct rt6_info *ip6_dst_alloc(void)
147 {
148         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149 }
150
151 static void ip6_dst_destroy(struct dst_entry *dst)
152 {
153         struct rt6_info *rt = (struct rt6_info *)dst;
154         struct inet6_dev *idev = rt->rt6i_idev;
155
156         if (idev != NULL) {
157                 rt->rt6i_idev = NULL;
158                 in6_dev_put(idev);
159         }       
160 }
161
162 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163                            int how)
164 {
165         struct rt6_info *rt = (struct rt6_info *)dst;
166         struct inet6_dev *idev = rt->rt6i_idev;
167
168         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170                 if (loopback_idev != NULL) {
171                         rt->rt6i_idev = loopback_idev;
172                         in6_dev_put(idev);
173                 }
174         }
175 }
176
177 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178 {
179         return (rt->rt6i_flags & RTF_EXPIRES &&
180                 time_after(jiffies, rt->rt6i_expires));
181 }
182
183 /*
184  *      Route lookup. Any rt6_lock is implied.
185  */
186
187 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188                                                     int oif,
189                                                     int strict)
190 {
191         struct rt6_info *local = NULL;
192         struct rt6_info *sprt;
193
194         if (oif) {
195                 for (sprt = rt; sprt; sprt = sprt->u.next) {
196                         struct net_device *dev = sprt->rt6i_dev;
197                         if (dev->ifindex == oif)
198                                 return sprt;
199                         if (dev->flags & IFF_LOOPBACK) {
200                                 if (sprt->rt6i_idev == NULL ||
201                                     sprt->rt6i_idev->dev->ifindex != oif) {
202                                         if (strict && oif)
203                                                 continue;
204                                         if (local && (!oif || 
205                                                       local->rt6i_idev->dev->ifindex == oif))
206                                                 continue;
207                                 }
208                                 local = sprt;
209                         }
210                 }
211
212                 if (local)
213                         return local;
214
215                 if (strict)
216                         return &ip6_null_entry;
217         }
218         return rt;
219 }
220
221 /*
222  * Default Router Selection (RFC 2461 6.3.6)
223  */
224 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
225 {
226         struct net_device *dev = rt->rt6i_dev;
227         if (!oif || dev->ifindex == oif)
228                 return 2;
229         if ((dev->flags & IFF_LOOPBACK) &&
230             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
231                 return 1;
232         return 0;
233 }
234
235 static int inline rt6_check_neigh(struct rt6_info *rt)
236 {
237         struct neighbour *neigh = rt->rt6i_nexthop;
238         int m = 0;
239         if (neigh) {
240                 read_lock_bh(&neigh->lock);
241                 if (neigh->nud_state & NUD_VALID)
242                         m = 1;
243                 read_unlock_bh(&neigh->lock);
244         }
245         return m;
246 }
247
248 static int rt6_score_route(struct rt6_info *rt, int oif,
249                            int strict)
250 {
251         int m = rt6_check_dev(rt, oif);
252         if (!m && (strict & RT6_SELECT_F_IFACE))
253                 return -1;
254         if (rt6_check_neigh(rt))
255                 m |= 4;
256         else if (strict & RT6_SELECT_F_REACHABLE)
257                 return -1;
258         return m;
259 }
260
261 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
262                                    int strict)
263 {
264         struct rt6_info *match = NULL, *last = NULL;
265         struct rt6_info *rt, *rt0 = *head;
266         u32 metric;
267         int mpri = -1;
268
269         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
270                   __FUNCTION__, head, head ? *head : NULL, oif);
271
272         for (rt = rt0, metric = rt0->rt6i_metric;
273              rt && rt->rt6i_metric == metric;
274              rt = rt->u.next) {
275                 int m;
276
277                 if (rt6_check_expired(rt))
278                         continue;
279
280                 last = rt;
281
282                 m = rt6_score_route(rt, oif, strict);
283                 if (m < 0)
284                         continue;
285
286                 if (m > mpri) {
287                         match = rt;
288                         mpri = m;
289                 }
290         }
291
292         if (!match &&
293             (strict & RT6_SELECT_F_REACHABLE) &&
294             last && last != rt0) {
295                 /* no entries matched; do round-robin */
296                 *head = rt0->u.next;
297                 rt0->u.next = last->u.next;
298                 last->u.next = rt0;
299         }
300
301         RT6_TRACE("%s() => %p, score=%d\n",
302                   __FUNCTION__, match, mpri);
303
304         return (match ? match : &ip6_null_entry);
305 }
306
307 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
308                             int oif, int strict)
309 {
310         struct fib6_node *fn;
311         struct rt6_info *rt;
312
313         read_lock_bh(&rt6_lock);
314         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
315         rt = rt6_device_match(fn->leaf, oif, strict);
316         dst_hold(&rt->u.dst);
317         rt->u.dst.__use++;
318         read_unlock_bh(&rt6_lock);
319
320         rt->u.dst.lastuse = jiffies;
321         if (rt->u.dst.error == 0)
322                 return rt;
323         dst_release(&rt->u.dst);
324         return NULL;
325 }
326
327 /* ip6_ins_rt is called with FREE rt6_lock.
328    It takes new route entry, the addition fails by any reason the
329    route is freed. In any case, if caller does not hold it, it may
330    be destroyed.
331  */
332
333 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
334                 void *_rtattr, struct netlink_skb_parms *req)
335 {
336         int err;
337
338         write_lock_bh(&rt6_lock);
339         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
340         write_unlock_bh(&rt6_lock);
341
342         return err;
343 }
344
345 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
346                                       struct in6_addr *saddr)
347 {
348         struct rt6_info *rt;
349
350         /*
351          *      Clone the route.
352          */
353
354         rt = ip6_rt_copy(ort);
355
356         if (rt) {
357                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
358                         if (rt->rt6i_dst.plen != 128 &&
359                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
360                                 rt->rt6i_flags |= RTF_ANYCAST;
361                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
362                 }
363
364                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
365                 rt->rt6i_dst.plen = 128;
366                 rt->rt6i_flags |= RTF_CACHE;
367                 rt->u.dst.flags |= DST_HOST;
368
369 #ifdef CONFIG_IPV6_SUBTREES
370                 if (rt->rt6i_src.plen && saddr) {
371                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
372                         rt->rt6i_src.plen = 128;
373                 }
374 #endif
375
376                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
377
378         }
379
380         return rt;
381 }
382
383 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
384 {
385         struct rt6_info *rt = ip6_rt_copy(ort);
386         if (rt) {
387                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
388                 rt->rt6i_dst.plen = 128;
389                 rt->rt6i_flags |= RTF_CACHE;
390                 if (rt->rt6i_flags & RTF_REJECT)
391                         rt->u.dst.error = ort->u.dst.error;
392                 rt->u.dst.flags |= DST_HOST;
393                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
394         }
395         return rt;
396 }
397
398 #define BACKTRACK() \
399 if (rt == &ip6_null_entry && strict) { \
400        while ((fn = fn->parent) != NULL) { \
401                 if (fn->fn_flags & RTN_ROOT) { \
402                         goto out; \
403                 } \
404                 if (fn->fn_flags & RTN_RTINFO) \
405                         goto restart; \
406         } \
407 }
408
409
410 void ip6_route_input(struct sk_buff *skb)
411 {
412         struct fib6_node *fn;
413         struct rt6_info *rt, *nrt;
414         int strict;
415         int attempts = 3;
416         int err;
417
418         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
419
420 relookup:
421         read_lock_bh(&rt6_lock);
422
423         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
424                          &skb->nh.ipv6h->saddr);
425
426 restart:
427         rt = fn->leaf;
428
429         if ((rt->rt6i_flags & RTF_CACHE)) {
430                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
431                 BACKTRACK();
432                 goto out;
433         }
434
435         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
436         BACKTRACK();
437
438         dst_hold(&rt->u.dst);
439         read_unlock_bh(&rt6_lock);
440
441         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
442                 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
443         else {
444 #if CLONE_OFFLINK_ROUTE
445                 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
446 #else
447                 goto out2;
448 #endif
449         }
450
451         dst_release(&rt->u.dst);
452         rt = nrt ? : &ip6_null_entry;
453
454         dst_hold(&rt->u.dst);
455         if (nrt) {
456                 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
457                 if (!err)
458                         goto out2;
459         }
460
461         if (--attempts <= 0)
462                 goto out2;
463
464         /*
465          * Race condition! In the gap, when rt6_lock was
466          * released someone could insert this route.  Relookup.
467          */
468         dst_release(&rt->u.dst);
469         goto relookup;
470
471 out:
472         dst_hold(&rt->u.dst);
473         read_unlock_bh(&rt6_lock);
474 out2:
475         rt->u.dst.lastuse = jiffies;
476         rt->u.dst.__use++;
477         skb->dst = (struct dst_entry *) rt;
478         return;
479 }
480
481 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
482 {
483         struct fib6_node *fn;
484         struct rt6_info *rt, *nrt;
485         int strict;
486         int attempts = 3;
487         int err;
488
489         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
490
491 relookup:
492         read_lock_bh(&rt6_lock);
493
494         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
495
496 restart:
497         rt = fn->leaf;
498
499         if ((rt->rt6i_flags & RTF_CACHE)) {
500                 rt = rt6_device_match(rt, fl->oif, strict);
501                 BACKTRACK();
502                 goto out;
503         }
504         if (rt->rt6i_flags & RTF_DEFAULT) {
505                 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
506                 if (rt == &ip6_null_entry)
507                         rt = rt6_select(&fn->leaf, fl->oif, strict);
508         } else {
509                 rt = rt6_device_match(rt, fl->oif, strict);
510                 BACKTRACK();
511         }
512
513         dst_hold(&rt->u.dst);
514         read_unlock_bh(&rt6_lock);
515
516         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
517                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
518         else {
519 #if CLONE_OFFLINK_ROUTE
520                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
521 #else
522                 goto out2;
523 #endif
524         }
525
526         dst_release(&rt->u.dst);
527         rt = nrt ? : &ip6_null_entry;
528
529         dst_hold(&rt->u.dst);
530         if (nrt) {
531                 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
532                 if (!err)
533                         goto out2;
534         }
535
536         if (--attempts <= 0)
537                 goto out2;
538
539         /*
540          * Race condition! In the gap, when rt6_lock was
541          * released someone could insert this route.  Relookup.
542          */
543         dst_release(&rt->u.dst);
544         goto relookup;
545
546 out:
547         dst_hold(&rt->u.dst);
548         read_unlock_bh(&rt6_lock);
549 out2:
550         rt->u.dst.lastuse = jiffies;
551         rt->u.dst.__use++;
552         return &rt->u.dst;
553 }
554
555
556 /*
557  *      Destination cache support functions
558  */
559
560 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
561 {
562         struct rt6_info *rt;
563
564         rt = (struct rt6_info *) dst;
565
566         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
567                 return dst;
568
569         return NULL;
570 }
571
572 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
573 {
574         struct rt6_info *rt = (struct rt6_info *) dst;
575
576         if (rt) {
577                 if (rt->rt6i_flags & RTF_CACHE)
578                         ip6_del_rt(rt, NULL, NULL, NULL);
579                 else
580                         dst_release(dst);
581         }
582         return NULL;
583 }
584
585 static void ip6_link_failure(struct sk_buff *skb)
586 {
587         struct rt6_info *rt;
588
589         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
590
591         rt = (struct rt6_info *) skb->dst;
592         if (rt) {
593                 if (rt->rt6i_flags&RTF_CACHE) {
594                         dst_set_expires(&rt->u.dst, 0);
595                         rt->rt6i_flags |= RTF_EXPIRES;
596                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
597                         rt->rt6i_node->fn_sernum = -1;
598         }
599 }
600
601 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
602 {
603         struct rt6_info *rt6 = (struct rt6_info*)dst;
604
605         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
606                 rt6->rt6i_flags |= RTF_MODIFIED;
607                 if (mtu < IPV6_MIN_MTU) {
608                         mtu = IPV6_MIN_MTU;
609                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
610                 }
611                 dst->metrics[RTAX_MTU-1] = mtu;
612         }
613 }
614
615 /* Protected by rt6_lock.  */
616 static struct dst_entry *ndisc_dst_gc_list;
617 static int ipv6_get_mtu(struct net_device *dev);
618
619 static inline unsigned int ipv6_advmss(unsigned int mtu)
620 {
621         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
622
623         if (mtu < ip6_rt_min_advmss)
624                 mtu = ip6_rt_min_advmss;
625
626         /*
627          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
628          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
629          * IPV6_MAXPLEN is also valid and means: "any MSS, 
630          * rely only on pmtu discovery"
631          */
632         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
633                 mtu = IPV6_MAXPLEN;
634         return mtu;
635 }
636
637 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
638                                   struct neighbour *neigh,
639                                   struct in6_addr *addr,
640                                   int (*output)(struct sk_buff *))
641 {
642         struct rt6_info *rt;
643         struct inet6_dev *idev = in6_dev_get(dev);
644
645         if (unlikely(idev == NULL))
646                 return NULL;
647
648         rt = ip6_dst_alloc();
649         if (unlikely(rt == NULL)) {
650                 in6_dev_put(idev);
651                 goto out;
652         }
653
654         dev_hold(dev);
655         if (neigh)
656                 neigh_hold(neigh);
657         else
658                 neigh = ndisc_get_neigh(dev, addr);
659
660         rt->rt6i_dev      = dev;
661         rt->rt6i_idev     = idev;
662         rt->rt6i_nexthop  = neigh;
663         atomic_set(&rt->u.dst.__refcnt, 1);
664         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
665         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
666         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
667         rt->u.dst.output  = output;
668
669 #if 0   /* there's no chance to use these for ndisc */
670         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
671                                 ? DST_HOST 
672                                 : 0;
673         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
674         rt->rt6i_dst.plen = 128;
675 #endif
676
677         write_lock_bh(&rt6_lock);
678         rt->u.dst.next = ndisc_dst_gc_list;
679         ndisc_dst_gc_list = &rt->u.dst;
680         write_unlock_bh(&rt6_lock);
681
682         fib6_force_start_gc();
683
684 out:
685         return (struct dst_entry *)rt;
686 }
687
688 int ndisc_dst_gc(int *more)
689 {
690         struct dst_entry *dst, *next, **pprev;
691         int freed;
692
693         next = NULL;
694         pprev = &ndisc_dst_gc_list;
695         freed = 0;
696         while ((dst = *pprev) != NULL) {
697                 if (!atomic_read(&dst->__refcnt)) {
698                         *pprev = dst->next;
699                         dst_free(dst);
700                         freed++;
701                 } else {
702                         pprev = &dst->next;
703                         (*more)++;
704                 }
705         }
706
707         return freed;
708 }
709
710 static int ip6_dst_gc(void)
711 {
712         static unsigned expire = 30*HZ;
713         static unsigned long last_gc;
714         unsigned long now = jiffies;
715
716         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
717             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
718                 goto out;
719
720         expire++;
721         fib6_run_gc(expire);
722         last_gc = now;
723         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
724                 expire = ip6_rt_gc_timeout>>1;
725
726 out:
727         expire -= expire>>ip6_rt_gc_elasticity;
728         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
729 }
730
731 /* Clean host part of a prefix. Not necessary in radix tree,
732    but results in cleaner routing tables.
733
734    Remove it only when all the things will work!
735  */
736
737 static int ipv6_get_mtu(struct net_device *dev)
738 {
739         int mtu = IPV6_MIN_MTU;
740         struct inet6_dev *idev;
741
742         idev = in6_dev_get(dev);
743         if (idev) {
744                 mtu = idev->cnf.mtu6;
745                 in6_dev_put(idev);
746         }
747         return mtu;
748 }
749
750 int ipv6_get_hoplimit(struct net_device *dev)
751 {
752         int hoplimit = ipv6_devconf.hop_limit;
753         struct inet6_dev *idev;
754
755         idev = in6_dev_get(dev);
756         if (idev) {
757                 hoplimit = idev->cnf.hop_limit;
758                 in6_dev_put(idev);
759         }
760         return hoplimit;
761 }
762
763 /*
764  *
765  */
766
767 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
768                 void *_rtattr, struct netlink_skb_parms *req)
769 {
770         int err;
771         struct rtmsg *r;
772         struct rtattr **rta;
773         struct rt6_info *rt = NULL;
774         struct net_device *dev = NULL;
775         struct inet6_dev *idev = NULL;
776         int addr_type;
777
778         rta = (struct rtattr **) _rtattr;
779
780         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
781                 return -EINVAL;
782 #ifndef CONFIG_IPV6_SUBTREES
783         if (rtmsg->rtmsg_src_len)
784                 return -EINVAL;
785 #endif
786         if (rtmsg->rtmsg_ifindex) {
787                 err = -ENODEV;
788                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
789                 if (!dev)
790                         goto out;
791                 idev = in6_dev_get(dev);
792                 if (!idev)
793                         goto out;
794         }
795
796         if (rtmsg->rtmsg_metric == 0)
797                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
798
799         rt = ip6_dst_alloc();
800
801         if (rt == NULL) {
802                 err = -ENOMEM;
803                 goto out;
804         }
805
806         rt->u.dst.obsolete = -1;
807         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
808         if (nlh && (r = NLMSG_DATA(nlh))) {
809                 rt->rt6i_protocol = r->rtm_protocol;
810         } else {
811                 rt->rt6i_protocol = RTPROT_BOOT;
812         }
813
814         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
815
816         if (addr_type & IPV6_ADDR_MULTICAST)
817                 rt->u.dst.input = ip6_mc_input;
818         else
819                 rt->u.dst.input = ip6_forward;
820
821         rt->u.dst.output = ip6_output;
822
823         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
824                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
825         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
826         if (rt->rt6i_dst.plen == 128)
827                rt->u.dst.flags = DST_HOST;
828
829 #ifdef CONFIG_IPV6_SUBTREES
830         ipv6_addr_prefix(&rt->rt6i_src.addr, 
831                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
832         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
833 #endif
834
835         rt->rt6i_metric = rtmsg->rtmsg_metric;
836
837         /* We cannot add true routes via loopback here,
838            they would result in kernel looping; promote them to reject routes
839          */
840         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
841             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
842                 /* hold loopback dev/idev if we haven't done so. */
843                 if (dev != &loopback_dev) {
844                         if (dev) {
845                                 dev_put(dev);
846                                 in6_dev_put(idev);
847                         }
848                         dev = &loopback_dev;
849                         dev_hold(dev);
850                         idev = in6_dev_get(dev);
851                         if (!idev) {
852                                 err = -ENODEV;
853                                 goto out;
854                         }
855                 }
856                 rt->u.dst.output = ip6_pkt_discard_out;
857                 rt->u.dst.input = ip6_pkt_discard;
858                 rt->u.dst.error = -ENETUNREACH;
859                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
860                 goto install_route;
861         }
862
863         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
864                 struct in6_addr *gw_addr;
865                 int gwa_type;
866
867                 gw_addr = &rtmsg->rtmsg_gateway;
868                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
869                 gwa_type = ipv6_addr_type(gw_addr);
870
871                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
872                         struct rt6_info *grt;
873
874                         /* IPv6 strictly inhibits using not link-local
875                            addresses as nexthop address.
876                            Otherwise, router will not able to send redirects.
877                            It is very good, but in some (rare!) circumstances
878                            (SIT, PtP, NBMA NOARP links) it is handy to allow
879                            some exceptions. --ANK
880                          */
881                         err = -EINVAL;
882                         if (!(gwa_type&IPV6_ADDR_UNICAST))
883                                 goto out;
884
885                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
886
887                         err = -EHOSTUNREACH;
888                         if (grt == NULL)
889                                 goto out;
890                         if (dev) {
891                                 if (dev != grt->rt6i_dev) {
892                                         dst_release(&grt->u.dst);
893                                         goto out;
894                                 }
895                         } else {
896                                 dev = grt->rt6i_dev;
897                                 idev = grt->rt6i_idev;
898                                 dev_hold(dev);
899                                 in6_dev_hold(grt->rt6i_idev);
900                         }
901                         if (!(grt->rt6i_flags&RTF_GATEWAY))
902                                 err = 0;
903                         dst_release(&grt->u.dst);
904
905                         if (err)
906                                 goto out;
907                 }
908                 err = -EINVAL;
909                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
910                         goto out;
911         }
912
913         err = -ENODEV;
914         if (dev == NULL)
915                 goto out;
916
917         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
918                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
919                 if (IS_ERR(rt->rt6i_nexthop)) {
920                         err = PTR_ERR(rt->rt6i_nexthop);
921                         rt->rt6i_nexthop = NULL;
922                         goto out;
923                 }
924         }
925
926         rt->rt6i_flags = rtmsg->rtmsg_flags;
927
928 install_route:
929         if (rta && rta[RTA_METRICS-1]) {
930                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
931                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
932
933                 while (RTA_OK(attr, attrlen)) {
934                         unsigned flavor = attr->rta_type;
935                         if (flavor) {
936                                 if (flavor > RTAX_MAX) {
937                                         err = -EINVAL;
938                                         goto out;
939                                 }
940                                 rt->u.dst.metrics[flavor-1] =
941                                         *(u32 *)RTA_DATA(attr);
942                         }
943                         attr = RTA_NEXT(attr, attrlen);
944                 }
945         }
946
947         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
948                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
949         if (!rt->u.dst.metrics[RTAX_MTU-1])
950                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
951         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
952                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
953         rt->u.dst.dev = dev;
954         rt->rt6i_idev = idev;
955         return ip6_ins_rt(rt, nlh, _rtattr, req);
956
957 out:
958         if (dev)
959                 dev_put(dev);
960         if (idev)
961                 in6_dev_put(idev);
962         if (rt)
963                 dst_free((struct dst_entry *) rt);
964         return err;
965 }
966
967 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
968 {
969         int err;
970
971         write_lock_bh(&rt6_lock);
972
973         err = fib6_del(rt, nlh, _rtattr, req);
974         dst_release(&rt->u.dst);
975
976         write_unlock_bh(&rt6_lock);
977
978         return err;
979 }
980
981 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
982 {
983         struct fib6_node *fn;
984         struct rt6_info *rt;
985         int err = -ESRCH;
986
987         read_lock_bh(&rt6_lock);
988
989         fn = fib6_locate(&ip6_routing_table,
990                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
991                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
992         
993         if (fn) {
994                 for (rt = fn->leaf; rt; rt = rt->u.next) {
995                         if (rtmsg->rtmsg_ifindex &&
996                             (rt->rt6i_dev == NULL ||
997                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
998                                 continue;
999                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1000                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1001                                 continue;
1002                         if (rtmsg->rtmsg_metric &&
1003                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1004                                 continue;
1005                         dst_hold(&rt->u.dst);
1006                         read_unlock_bh(&rt6_lock);
1007
1008                         return ip6_del_rt(rt, nlh, _rtattr, req);
1009                 }
1010         }
1011         read_unlock_bh(&rt6_lock);
1012
1013         return err;
1014 }
1015
1016 /*
1017  *      Handle redirects
1018  */
1019 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1020                   struct neighbour *neigh, u8 *lladdr, int on_link)
1021 {
1022         struct rt6_info *rt, *nrt;
1023
1024         /* Locate old route to this destination. */
1025         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1026
1027         if (rt == NULL)
1028                 return;
1029
1030         if (neigh->dev != rt->rt6i_dev)
1031                 goto out;
1032
1033         /*
1034          * Current route is on-link; redirect is always invalid.
1035          * 
1036          * Seems, previous statement is not true. It could
1037          * be node, which looks for us as on-link (f.e. proxy ndisc)
1038          * But then router serving it might decide, that we should
1039          * know truth 8)8) --ANK (980726).
1040          */
1041         if (!(rt->rt6i_flags&RTF_GATEWAY))
1042                 goto out;
1043
1044         /*
1045          *      RFC 2461 specifies that redirects should only be
1046          *      accepted if they come from the nexthop to the target.
1047          *      Due to the way default routers are chosen, this notion
1048          *      is a bit fuzzy and one might need to check all default
1049          *      routers.
1050          */
1051         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1052                 if (rt->rt6i_flags & RTF_DEFAULT) {
1053                         struct rt6_info *rt1;
1054
1055                         read_lock(&rt6_lock);
1056                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1057                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1058                                         dst_hold(&rt1->u.dst);
1059                                         dst_release(&rt->u.dst);
1060                                         read_unlock(&rt6_lock);
1061                                         rt = rt1;
1062                                         goto source_ok;
1063                                 }
1064                         }
1065                         read_unlock(&rt6_lock);
1066                 }
1067                 if (net_ratelimit())
1068                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1069                                "for redirect target\n");
1070                 goto out;
1071         }
1072
1073 source_ok:
1074
1075         /*
1076          *      We have finally decided to accept it.
1077          */
1078
1079         neigh_update(neigh, lladdr, NUD_STALE, 
1080                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1081                      NEIGH_UPDATE_F_OVERRIDE|
1082                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1083                                      NEIGH_UPDATE_F_ISROUTER))
1084                      );
1085
1086         /*
1087          * Redirect received -> path was valid.
1088          * Look, redirects are sent only in response to data packets,
1089          * so that this nexthop apparently is reachable. --ANK
1090          */
1091         dst_confirm(&rt->u.dst);
1092
1093         /* Duplicate redirect: silently ignore. */
1094         if (neigh == rt->u.dst.neighbour)
1095                 goto out;
1096
1097         nrt = ip6_rt_copy(rt);
1098         if (nrt == NULL)
1099                 goto out;
1100
1101         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1102         if (on_link)
1103                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1104
1105         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1106         nrt->rt6i_dst.plen = 128;
1107         nrt->u.dst.flags |= DST_HOST;
1108
1109         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1110         nrt->rt6i_nexthop = neigh_clone(neigh);
1111         /* Reset pmtu, it may be better */
1112         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1113         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1114
1115         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1116                 goto out;
1117
1118         if (rt->rt6i_flags&RTF_CACHE) {
1119                 ip6_del_rt(rt, NULL, NULL, NULL);
1120                 return;
1121         }
1122
1123 out:
1124         dst_release(&rt->u.dst);
1125         return;
1126 }
1127
1128 /*
1129  *      Handle ICMP "packet too big" messages
1130  *      i.e. Path MTU discovery
1131  */
1132
1133 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1134                         struct net_device *dev, u32 pmtu)
1135 {
1136         struct rt6_info *rt, *nrt;
1137         int allfrag = 0;
1138
1139         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1140         if (rt == NULL)
1141                 return;
1142
1143         if (pmtu >= dst_mtu(&rt->u.dst))
1144                 goto out;
1145
1146         if (pmtu < IPV6_MIN_MTU) {
1147                 /*
1148                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1149                  * MTU (1280) and a fragment header should always be included
1150                  * after a node receiving Too Big message reporting PMTU is
1151                  * less than the IPv6 Minimum Link MTU.
1152                  */
1153                 pmtu = IPV6_MIN_MTU;
1154                 allfrag = 1;
1155         }
1156
1157         /* New mtu received -> path was valid.
1158            They are sent only in response to data packets,
1159            so that this nexthop apparently is reachable. --ANK
1160          */
1161         dst_confirm(&rt->u.dst);
1162
1163         /* Host route. If it is static, it would be better
1164            not to override it, but add new one, so that
1165            when cache entry will expire old pmtu
1166            would return automatically.
1167          */
1168         if (rt->rt6i_flags & RTF_CACHE) {
1169                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1170                 if (allfrag)
1171                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1172                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1173                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1174                 goto out;
1175         }
1176
1177         /* Network route.
1178            Two cases are possible:
1179            1. It is connected route. Action: COW
1180            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1181          */
1182         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1183                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1184         else
1185                 nrt = rt6_alloc_clone(rt, daddr);
1186
1187         if (nrt) {
1188                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1189                 if (allfrag)
1190                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1191
1192                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1193                  * happened within 5 mins, the recommended timer is 10 mins.
1194                  * Here this route expiration time is set to ip6_rt_mtu_expires
1195                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1196                  * and detecting PMTU increase will be automatically happened.
1197                  */
1198                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1199                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1200
1201                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1202         }
1203 out:
1204         dst_release(&rt->u.dst);
1205 }
1206
1207 /*
1208  *      Misc support functions
1209  */
1210
1211 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1212 {
1213         struct rt6_info *rt = ip6_dst_alloc();
1214
1215         if (rt) {
1216                 rt->u.dst.input = ort->u.dst.input;
1217                 rt->u.dst.output = ort->u.dst.output;
1218
1219                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1220                 rt->u.dst.dev = ort->u.dst.dev;
1221                 if (rt->u.dst.dev)
1222                         dev_hold(rt->u.dst.dev);
1223                 rt->rt6i_idev = ort->rt6i_idev;
1224                 if (rt->rt6i_idev)
1225                         in6_dev_hold(rt->rt6i_idev);
1226                 rt->u.dst.lastuse = jiffies;
1227                 rt->rt6i_expires = 0;
1228
1229                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1230                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1231                 rt->rt6i_metric = 0;
1232
1233                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1234 #ifdef CONFIG_IPV6_SUBTREES
1235                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1236 #endif
1237         }
1238         return rt;
1239 }
1240
1241 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1242 {       
1243         struct rt6_info *rt;
1244         struct fib6_node *fn;
1245
1246         fn = &ip6_routing_table;
1247
1248         write_lock_bh(&rt6_lock);
1249         for (rt = fn->leaf; rt; rt=rt->u.next) {
1250                 if (dev == rt->rt6i_dev &&
1251                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1252                         break;
1253         }
1254         if (rt)
1255                 dst_hold(&rt->u.dst);
1256         write_unlock_bh(&rt6_lock);
1257         return rt;
1258 }
1259
1260 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1261                                      struct net_device *dev)
1262 {
1263         struct in6_rtmsg rtmsg;
1264
1265         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1266         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1267         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1268         rtmsg.rtmsg_metric = 1024;
1269         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1270
1271         rtmsg.rtmsg_ifindex = dev->ifindex;
1272
1273         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1274         return rt6_get_dflt_router(gwaddr, dev);
1275 }
1276
1277 void rt6_purge_dflt_routers(void)
1278 {
1279         struct rt6_info *rt;
1280
1281 restart:
1282         read_lock_bh(&rt6_lock);
1283         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1284                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1285                         dst_hold(&rt->u.dst);
1286
1287                         read_unlock_bh(&rt6_lock);
1288
1289                         ip6_del_rt(rt, NULL, NULL, NULL);
1290
1291                         goto restart;
1292                 }
1293         }
1294         read_unlock_bh(&rt6_lock);
1295 }
1296
1297 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1298 {
1299         struct in6_rtmsg rtmsg;
1300         int err;
1301
1302         switch(cmd) {
1303         case SIOCADDRT:         /* Add a route */
1304         case SIOCDELRT:         /* Delete a route */
1305                 if (!capable(CAP_NET_ADMIN))
1306                         return -EPERM;
1307                 err = copy_from_user(&rtmsg, arg,
1308                                      sizeof(struct in6_rtmsg));
1309                 if (err)
1310                         return -EFAULT;
1311                         
1312                 rtnl_lock();
1313                 switch (cmd) {
1314                 case SIOCADDRT:
1315                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1316                         break;
1317                 case SIOCDELRT:
1318                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1319                         break;
1320                 default:
1321                         err = -EINVAL;
1322                 }
1323                 rtnl_unlock();
1324
1325                 return err;
1326         };
1327
1328         return -EINVAL;
1329 }
1330
1331 /*
1332  *      Drop the packet on the floor
1333  */
1334
1335 static int ip6_pkt_discard(struct sk_buff *skb)
1336 {
1337         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1338         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1339         kfree_skb(skb);
1340         return 0;
1341 }
1342
1343 static int ip6_pkt_discard_out(struct sk_buff *skb)
1344 {
1345         skb->dev = skb->dst->dev;
1346         return ip6_pkt_discard(skb);
1347 }
1348
1349 /*
1350  *      Allocate a dst for local (unicast / anycast) address.
1351  */
1352
1353 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1354                                     const struct in6_addr *addr,
1355                                     int anycast)
1356 {
1357         struct rt6_info *rt = ip6_dst_alloc();
1358
1359         if (rt == NULL)
1360                 return ERR_PTR(-ENOMEM);
1361
1362         dev_hold(&loopback_dev);
1363         in6_dev_hold(idev);
1364
1365         rt->u.dst.flags = DST_HOST;
1366         rt->u.dst.input = ip6_input;
1367         rt->u.dst.output = ip6_output;
1368         rt->rt6i_dev = &loopback_dev;
1369         rt->rt6i_idev = idev;
1370         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1371         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1372         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1373         rt->u.dst.obsolete = -1;
1374
1375         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1376         if (anycast)
1377                 rt->rt6i_flags |= RTF_ANYCAST;
1378         else
1379                 rt->rt6i_flags |= RTF_LOCAL;
1380         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1381         if (rt->rt6i_nexthop == NULL) {
1382                 dst_free((struct dst_entry *) rt);
1383                 return ERR_PTR(-ENOMEM);
1384         }
1385
1386         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1387         rt->rt6i_dst.plen = 128;
1388
1389         atomic_set(&rt->u.dst.__refcnt, 1);
1390
1391         return rt;
1392 }
1393
1394 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1395 {
1396         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1397             rt != &ip6_null_entry) {
1398                 RT6_TRACE("deleted by ifdown %p\n", rt);
1399                 return -1;
1400         }
1401         return 0;
1402 }
1403
1404 void rt6_ifdown(struct net_device *dev)
1405 {
1406         write_lock_bh(&rt6_lock);
1407         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1408         write_unlock_bh(&rt6_lock);
1409 }
1410
1411 struct rt6_mtu_change_arg
1412 {
1413         struct net_device *dev;
1414         unsigned mtu;
1415 };
1416
1417 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1418 {
1419         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1420         struct inet6_dev *idev;
1421
1422         /* In IPv6 pmtu discovery is not optional,
1423            so that RTAX_MTU lock cannot disable it.
1424            We still use this lock to block changes
1425            caused by addrconf/ndisc.
1426         */
1427
1428         idev = __in6_dev_get(arg->dev);
1429         if (idev == NULL)
1430                 return 0;
1431
1432         /* For administrative MTU increase, there is no way to discover
1433            IPv6 PMTU increase, so PMTU increase should be updated here.
1434            Since RFC 1981 doesn't include administrative MTU increase
1435            update PMTU increase is a MUST. (i.e. jumbo frame)
1436          */
1437         /*
1438            If new MTU is less than route PMTU, this new MTU will be the
1439            lowest MTU in the path, update the route PMTU to reflect PMTU
1440            decreases; if new MTU is greater than route PMTU, and the
1441            old MTU is the lowest MTU in the path, update the route PMTU
1442            to reflect the increase. In this case if the other nodes' MTU
1443            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1444            PMTU discouvery.
1445          */
1446         if (rt->rt6i_dev == arg->dev &&
1447             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1448             (dst_mtu(&rt->u.dst) > arg->mtu ||
1449              (dst_mtu(&rt->u.dst) < arg->mtu &&
1450               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1451                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1452         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1453         return 0;
1454 }
1455
1456 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1457 {
1458         struct rt6_mtu_change_arg arg;
1459
1460         arg.dev = dev;
1461         arg.mtu = mtu;
1462         read_lock_bh(&rt6_lock);
1463         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1464         read_unlock_bh(&rt6_lock);
1465 }
1466
1467 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1468                               struct in6_rtmsg *rtmsg)
1469 {
1470         memset(rtmsg, 0, sizeof(*rtmsg));
1471
1472         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1473         rtmsg->rtmsg_src_len = r->rtm_src_len;
1474         rtmsg->rtmsg_flags = RTF_UP;
1475         if (r->rtm_type == RTN_UNREACHABLE)
1476                 rtmsg->rtmsg_flags |= RTF_REJECT;
1477
1478         if (rta[RTA_GATEWAY-1]) {
1479                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1480                         return -EINVAL;
1481                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1482                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1483         }
1484         if (rta[RTA_DST-1]) {
1485                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1486                         return -EINVAL;
1487                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1488         }
1489         if (rta[RTA_SRC-1]) {
1490                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1491                         return -EINVAL;
1492                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1493         }
1494         if (rta[RTA_OIF-1]) {
1495                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1496                         return -EINVAL;
1497                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1498         }
1499         if (rta[RTA_PRIORITY-1]) {
1500                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1501                         return -EINVAL;
1502                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1503         }
1504         return 0;
1505 }
1506
1507 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1508 {
1509         struct rtmsg *r = NLMSG_DATA(nlh);
1510         struct in6_rtmsg rtmsg;
1511
1512         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1513                 return -EINVAL;
1514         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1515 }
1516
1517 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1518 {
1519         struct rtmsg *r = NLMSG_DATA(nlh);
1520         struct in6_rtmsg rtmsg;
1521
1522         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1523                 return -EINVAL;
1524         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1525 }
1526
1527 struct rt6_rtnl_dump_arg
1528 {
1529         struct sk_buff *skb;
1530         struct netlink_callback *cb;
1531 };
1532
1533 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1534                          struct in6_addr *dst, struct in6_addr *src,
1535                          int iif, int type, u32 pid, u32 seq,
1536                          int prefix, unsigned int flags)
1537 {
1538         struct rtmsg *rtm;
1539         struct nlmsghdr  *nlh;
1540         unsigned char    *b = skb->tail;
1541         struct rta_cacheinfo ci;
1542
1543         if (prefix) {   /* user wants prefix routes only */
1544                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1545                         /* success since this is not a prefix route */
1546                         return 1;
1547                 }
1548         }
1549
1550         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1551         rtm = NLMSG_DATA(nlh);
1552         rtm->rtm_family = AF_INET6;
1553         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1554         rtm->rtm_src_len = rt->rt6i_src.plen;
1555         rtm->rtm_tos = 0;
1556         rtm->rtm_table = RT_TABLE_MAIN;
1557         if (rt->rt6i_flags&RTF_REJECT)
1558                 rtm->rtm_type = RTN_UNREACHABLE;
1559         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1560                 rtm->rtm_type = RTN_LOCAL;
1561         else
1562                 rtm->rtm_type = RTN_UNICAST;
1563         rtm->rtm_flags = 0;
1564         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1565         rtm->rtm_protocol = rt->rt6i_protocol;
1566         if (rt->rt6i_flags&RTF_DYNAMIC)
1567                 rtm->rtm_protocol = RTPROT_REDIRECT;
1568         else if (rt->rt6i_flags & RTF_ADDRCONF)
1569                 rtm->rtm_protocol = RTPROT_KERNEL;
1570         else if (rt->rt6i_flags&RTF_DEFAULT)
1571                 rtm->rtm_protocol = RTPROT_RA;
1572
1573         if (rt->rt6i_flags&RTF_CACHE)
1574                 rtm->rtm_flags |= RTM_F_CLONED;
1575
1576         if (dst) {
1577                 RTA_PUT(skb, RTA_DST, 16, dst);
1578                 rtm->rtm_dst_len = 128;
1579         } else if (rtm->rtm_dst_len)
1580                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1581 #ifdef CONFIG_IPV6_SUBTREES
1582         if (src) {
1583                 RTA_PUT(skb, RTA_SRC, 16, src);
1584                 rtm->rtm_src_len = 128;
1585         } else if (rtm->rtm_src_len)
1586                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1587 #endif
1588         if (iif)
1589                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1590         else if (dst) {
1591                 struct in6_addr saddr_buf;
1592                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1593                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1594         }
1595         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1596                 goto rtattr_failure;
1597         if (rt->u.dst.neighbour)
1598                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1599         if (rt->u.dst.dev)
1600                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1601         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1602         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1603         if (rt->rt6i_expires)
1604                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1605         else
1606                 ci.rta_expires = 0;
1607         ci.rta_used = rt->u.dst.__use;
1608         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1609         ci.rta_error = rt->u.dst.error;
1610         ci.rta_id = 0;
1611         ci.rta_ts = 0;
1612         ci.rta_tsage = 0;
1613         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1614         nlh->nlmsg_len = skb->tail - b;
1615         return skb->len;
1616
1617 nlmsg_failure:
1618 rtattr_failure:
1619         skb_trim(skb, b - skb->data);
1620         return -1;
1621 }
1622
1623 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1624 {
1625         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1626         int prefix;
1627
1628         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1629                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1630                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1631         } else
1632                 prefix = 0;
1633
1634         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1635                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1636                      prefix, NLM_F_MULTI);
1637 }
1638
1639 static int fib6_dump_node(struct fib6_walker_t *w)
1640 {
1641         int res;
1642         struct rt6_info *rt;
1643
1644         for (rt = w->leaf; rt; rt = rt->u.next) {
1645                 res = rt6_dump_route(rt, w->args);
1646                 if (res < 0) {
1647                         /* Frame is full, suspend walking */
1648                         w->leaf = rt;
1649                         return 1;
1650                 }
1651                 BUG_TRAP(res!=0);
1652         }
1653         w->leaf = NULL;
1654         return 0;
1655 }
1656
1657 static void fib6_dump_end(struct netlink_callback *cb)
1658 {
1659         struct fib6_walker_t *w = (void*)cb->args[0];
1660
1661         if (w) {
1662                 cb->args[0] = 0;
1663                 fib6_walker_unlink(w);
1664                 kfree(w);
1665         }
1666         cb->done = (void*)cb->args[1];
1667         cb->args[1] = 0;
1668 }
1669
1670 static int fib6_dump_done(struct netlink_callback *cb)
1671 {
1672         fib6_dump_end(cb);
1673         return cb->done ? cb->done(cb) : 0;
1674 }
1675
1676 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1677 {
1678         struct rt6_rtnl_dump_arg arg;
1679         struct fib6_walker_t *w;
1680         int res;
1681
1682         arg.skb = skb;
1683         arg.cb = cb;
1684
1685         w = (void*)cb->args[0];
1686         if (w == NULL) {
1687                 /* New dump:
1688                  * 
1689                  * 1. hook callback destructor.
1690                  */
1691                 cb->args[1] = (long)cb->done;
1692                 cb->done = fib6_dump_done;
1693
1694                 /*
1695                  * 2. allocate and initialize walker.
1696                  */
1697                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1698                 if (w == NULL)
1699                         return -ENOMEM;
1700                 RT6_TRACE("dump<%p", w);
1701                 memset(w, 0, sizeof(*w));
1702                 w->root = &ip6_routing_table;
1703                 w->func = fib6_dump_node;
1704                 w->args = &arg;
1705                 cb->args[0] = (long)w;
1706                 read_lock_bh(&rt6_lock);
1707                 res = fib6_walk(w);
1708                 read_unlock_bh(&rt6_lock);
1709         } else {
1710                 w->args = &arg;
1711                 read_lock_bh(&rt6_lock);
1712                 res = fib6_walk_continue(w);
1713                 read_unlock_bh(&rt6_lock);
1714         }
1715 #if RT6_DEBUG >= 3
1716         if (res <= 0 && skb->len == 0)
1717                 RT6_TRACE("%p>dump end\n", w);
1718 #endif
1719         res = res < 0 ? res : skb->len;
1720         /* res < 0 is an error. (really, impossible)
1721            res == 0 means that dump is complete, but skb still can contain data.
1722            res > 0 dump is not complete, but frame is full.
1723          */
1724         /* Destroy walker, if dump of this table is complete. */
1725         if (res <= 0)
1726                 fib6_dump_end(cb);
1727         return res;
1728 }
1729
1730 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1731 {
1732         struct rtattr **rta = arg;
1733         int iif = 0;
1734         int err = -ENOBUFS;
1735         struct sk_buff *skb;
1736         struct flowi fl;
1737         struct rt6_info *rt;
1738
1739         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1740         if (skb == NULL)
1741                 goto out;
1742
1743         /* Reserve room for dummy headers, this skb can pass
1744            through good chunk of routing engine.
1745          */
1746         skb->mac.raw = skb->data;
1747         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1748
1749         memset(&fl, 0, sizeof(fl));
1750         if (rta[RTA_SRC-1])
1751                 ipv6_addr_copy(&fl.fl6_src,
1752                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1753         if (rta[RTA_DST-1])
1754                 ipv6_addr_copy(&fl.fl6_dst,
1755                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1756
1757         if (rta[RTA_IIF-1])
1758                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1759
1760         if (iif) {
1761                 struct net_device *dev;
1762                 dev = __dev_get_by_index(iif);
1763                 if (!dev) {
1764                         err = -ENODEV;
1765                         goto out_free;
1766                 }
1767         }
1768
1769         fl.oif = 0;
1770         if (rta[RTA_OIF-1])
1771                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1772
1773         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1774
1775         skb->dst = &rt->u.dst;
1776
1777         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1778         err = rt6_fill_node(skb, rt, 
1779                             &fl.fl6_dst, &fl.fl6_src,
1780                             iif,
1781                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1782                             nlh->nlmsg_seq, 0, 0);
1783         if (err < 0) {
1784                 err = -EMSGSIZE;
1785                 goto out_free;
1786         }
1787
1788         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1789         if (err > 0)
1790                 err = 0;
1791 out:
1792         return err;
1793 out_free:
1794         kfree_skb(skb);
1795         goto out;       
1796 }
1797
1798 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1799                         struct netlink_skb_parms *req)
1800 {
1801         struct sk_buff *skb;
1802         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1803         u32 pid = current->pid;
1804         u32 seq = 0;
1805
1806         if (req)
1807                 pid = req->pid;
1808         if (nlh)
1809                 seq = nlh->nlmsg_seq;
1810         
1811         skb = alloc_skb(size, gfp_any());
1812         if (!skb) {
1813                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1814                 return;
1815         }
1816         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1817                 kfree_skb(skb);
1818                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1819                 return;
1820         }
1821         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1822         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1823 }
1824
1825 /*
1826  *      /proc
1827  */
1828
1829 #ifdef CONFIG_PROC_FS
1830
1831 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1832
1833 struct rt6_proc_arg
1834 {
1835         char *buffer;
1836         int offset;
1837         int length;
1838         int skip;
1839         int len;
1840 };
1841
1842 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1843 {
1844         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1845         int i;
1846
1847         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1848                 arg->skip++;
1849                 return 0;
1850         }
1851
1852         if (arg->len >= arg->length)
1853                 return 0;
1854
1855         for (i=0; i<16; i++) {
1856                 sprintf(arg->buffer + arg->len, "%02x",
1857                         rt->rt6i_dst.addr.s6_addr[i]);
1858                 arg->len += 2;
1859         }
1860         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1861                             rt->rt6i_dst.plen);
1862
1863 #ifdef CONFIG_IPV6_SUBTREES
1864         for (i=0; i<16; i++) {
1865                 sprintf(arg->buffer + arg->len, "%02x",
1866                         rt->rt6i_src.addr.s6_addr[i]);
1867                 arg->len += 2;
1868         }
1869         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1870                             rt->rt6i_src.plen);
1871 #else
1872         sprintf(arg->buffer + arg->len,
1873                 "00000000000000000000000000000000 00 ");
1874         arg->len += 36;
1875 #endif
1876
1877         if (rt->rt6i_nexthop) {
1878                 for (i=0; i<16; i++) {
1879                         sprintf(arg->buffer + arg->len, "%02x",
1880                                 rt->rt6i_nexthop->primary_key[i]);
1881                         arg->len += 2;
1882                 }
1883         } else {
1884                 sprintf(arg->buffer + arg->len,
1885                         "00000000000000000000000000000000");
1886                 arg->len += 32;
1887         }
1888         arg->len += sprintf(arg->buffer + arg->len,
1889                             " %08x %08x %08x %08x %8s\n",
1890                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1891                             rt->u.dst.__use, rt->rt6i_flags, 
1892                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1893         return 0;
1894 }
1895
1896 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1897 {
1898         struct rt6_proc_arg arg;
1899         arg.buffer = buffer;
1900         arg.offset = offset;
1901         arg.length = length;
1902         arg.skip = 0;
1903         arg.len = 0;
1904
1905         read_lock_bh(&rt6_lock);
1906         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1907         read_unlock_bh(&rt6_lock);
1908
1909         *start = buffer;
1910         if (offset)
1911                 *start += offset % RT6_INFO_LEN;
1912
1913         arg.len -= offset % RT6_INFO_LEN;
1914
1915         if (arg.len > length)
1916                 arg.len = length;
1917         if (arg.len < 0)
1918                 arg.len = 0;
1919
1920         return arg.len;
1921 }
1922
1923 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1924 {
1925         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1926                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1927                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1928                       rt6_stats.fib_rt_cache,
1929                       atomic_read(&ip6_dst_ops.entries),
1930                       rt6_stats.fib_discarded_routes);
1931
1932         return 0;
1933 }
1934
1935 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1936 {
1937         return single_open(file, rt6_stats_seq_show, NULL);
1938 }
1939
1940 static struct file_operations rt6_stats_seq_fops = {
1941         .owner   = THIS_MODULE,
1942         .open    = rt6_stats_seq_open,
1943         .read    = seq_read,
1944         .llseek  = seq_lseek,
1945         .release = single_release,
1946 };
1947 #endif  /* CONFIG_PROC_FS */
1948
1949 #ifdef CONFIG_SYSCTL
1950
1951 static int flush_delay;
1952
1953 static
1954 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1955                               void __user *buffer, size_t *lenp, loff_t *ppos)
1956 {
1957         if (write) {
1958                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1959                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1960                 return 0;
1961         } else
1962                 return -EINVAL;
1963 }
1964
1965 ctl_table ipv6_route_table[] = {
1966         {
1967                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1968                 .procname       =       "flush",
1969                 .data           =       &flush_delay,
1970                 .maxlen         =       sizeof(int),
1971                 .mode           =       0200,
1972                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1973         },
1974         {
1975                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1976                 .procname       =       "gc_thresh",
1977                 .data           =       &ip6_dst_ops.gc_thresh,
1978                 .maxlen         =       sizeof(int),
1979                 .mode           =       0644,
1980                 .proc_handler   =       &proc_dointvec,
1981         },
1982         {
1983                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1984                 .procname       =       "max_size",
1985                 .data           =       &ip6_rt_max_size,
1986                 .maxlen         =       sizeof(int),
1987                 .mode           =       0644,
1988                 .proc_handler   =       &proc_dointvec,
1989         },
1990         {
1991                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1992                 .procname       =       "gc_min_interval",
1993                 .data           =       &ip6_rt_gc_min_interval,
1994                 .maxlen         =       sizeof(int),
1995                 .mode           =       0644,
1996                 .proc_handler   =       &proc_dointvec_jiffies,
1997                 .strategy       =       &sysctl_jiffies,
1998         },
1999         {
2000                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2001                 .procname       =       "gc_timeout",
2002                 .data           =       &ip6_rt_gc_timeout,
2003                 .maxlen         =       sizeof(int),
2004                 .mode           =       0644,
2005                 .proc_handler   =       &proc_dointvec_jiffies,
2006                 .strategy       =       &sysctl_jiffies,
2007         },
2008         {
2009                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2010                 .procname       =       "gc_interval",
2011                 .data           =       &ip6_rt_gc_interval,
2012                 .maxlen         =       sizeof(int),
2013                 .mode           =       0644,
2014                 .proc_handler   =       &proc_dointvec_jiffies,
2015                 .strategy       =       &sysctl_jiffies,
2016         },
2017         {
2018                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2019                 .procname       =       "gc_elasticity",
2020                 .data           =       &ip6_rt_gc_elasticity,
2021                 .maxlen         =       sizeof(int),
2022                 .mode           =       0644,
2023                 .proc_handler   =       &proc_dointvec_jiffies,
2024                 .strategy       =       &sysctl_jiffies,
2025         },
2026         {
2027                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2028                 .procname       =       "mtu_expires",
2029                 .data           =       &ip6_rt_mtu_expires,
2030                 .maxlen         =       sizeof(int),
2031                 .mode           =       0644,
2032                 .proc_handler   =       &proc_dointvec_jiffies,
2033                 .strategy       =       &sysctl_jiffies,
2034         },
2035         {
2036                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2037                 .procname       =       "min_adv_mss",
2038                 .data           =       &ip6_rt_min_advmss,
2039                 .maxlen         =       sizeof(int),
2040                 .mode           =       0644,
2041                 .proc_handler   =       &proc_dointvec_jiffies,
2042                 .strategy       =       &sysctl_jiffies,
2043         },
2044         {
2045                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2046                 .procname       =       "gc_min_interval_ms",
2047                 .data           =       &ip6_rt_gc_min_interval,
2048                 .maxlen         =       sizeof(int),
2049                 .mode           =       0644,
2050                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2051                 .strategy       =       &sysctl_ms_jiffies,
2052         },
2053         { .ctl_name = 0 }
2054 };
2055
2056 #endif
2057
2058 void __init ip6_route_init(void)
2059 {
2060         struct proc_dir_entry *p;
2061
2062         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2063                                                      sizeof(struct rt6_info),
2064                                                      0, SLAB_HWCACHE_ALIGN,
2065                                                      NULL, NULL);
2066         if (!ip6_dst_ops.kmem_cachep)
2067                 panic("cannot create ip6_dst_cache");
2068
2069         fib6_init();
2070 #ifdef  CONFIG_PROC_FS
2071         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2072         if (p)
2073                 p->owner = THIS_MODULE;
2074
2075         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2076 #endif
2077 #ifdef CONFIG_XFRM
2078         xfrm6_init();
2079 #endif
2080 }
2081
2082 void ip6_route_cleanup(void)
2083 {
2084 #ifdef CONFIG_PROC_FS
2085         proc_net_remove("ipv6_route");
2086         proc_net_remove("rt6_stats");
2087 #endif
2088 #ifdef CONFIG_XFRM
2089         xfrm6_fini();
2090 #endif
2091         rt6_ifdown(NULL);
2092         fib6_gc_cleanup();
2093         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2094 }