[IPV4] FIB: Include nexthop device indexes in fib_info hashfn.
[safe/jmp/linux-2.6] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
24 #include <linux/mm.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/in.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
37
38 #include <net/arp.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/netlink.h>
46 #include <net/nexthop.h>
47
48 #include "fib_lookup.h"
49
50 static DEFINE_SPINLOCK(fib_info_lock);
51 static struct hlist_head *fib_info_hash;
52 static struct hlist_head *fib_info_laddrhash;
53 static unsigned int fib_hash_size;
54 static unsigned int fib_info_cnt;
55
56 #define DEVINDEX_HASHBITS 8
57 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
59
60 #ifdef CONFIG_IP_ROUTE_MULTIPATH
61
62 static DEFINE_SPINLOCK(fib_multipath_lock);
63
64 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70 #else /* CONFIG_IP_ROUTE_MULTIPATH */
71
72 /* Hope, that gcc will optimize it to get rid of dummy loop */
73
74 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75 for (nhsel=0; nhsel < 1; nhsel++)
76
77 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78 for (nhsel=0; nhsel < 1; nhsel++)
79
80 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
81
82 #define endfor_nexthops(fi) }
83
84
85 static const struct
86 {
87         int     error;
88         u8      scope;
89 } fib_props[RTN_MAX + 1] = {
90         {
91                 .error  = 0,
92                 .scope  = RT_SCOPE_NOWHERE,
93         },      /* RTN_UNSPEC */
94         {
95                 .error  = 0,
96                 .scope  = RT_SCOPE_UNIVERSE,
97         },      /* RTN_UNICAST */
98         {
99                 .error  = 0,
100                 .scope  = RT_SCOPE_HOST,
101         },      /* RTN_LOCAL */
102         {
103                 .error  = 0,
104                 .scope  = RT_SCOPE_LINK,
105         },      /* RTN_BROADCAST */
106         {
107                 .error  = 0,
108                 .scope  = RT_SCOPE_LINK,
109         },      /* RTN_ANYCAST */
110         {
111                 .error  = 0,
112                 .scope  = RT_SCOPE_UNIVERSE,
113         },      /* RTN_MULTICAST */
114         {
115                 .error  = -EINVAL,
116                 .scope  = RT_SCOPE_UNIVERSE,
117         },      /* RTN_BLACKHOLE */
118         {
119                 .error  = -EHOSTUNREACH,
120                 .scope  = RT_SCOPE_UNIVERSE,
121         },      /* RTN_UNREACHABLE */
122         {
123                 .error  = -EACCES,
124                 .scope  = RT_SCOPE_UNIVERSE,
125         },      /* RTN_PROHIBIT */
126         {
127                 .error  = -EAGAIN,
128                 .scope  = RT_SCOPE_UNIVERSE,
129         },      /* RTN_THROW */
130         {
131                 .error  = -EINVAL,
132                 .scope  = RT_SCOPE_NOWHERE,
133         },      /* RTN_NAT */
134         {
135                 .error  = -EINVAL,
136                 .scope  = RT_SCOPE_NOWHERE,
137         },      /* RTN_XRESOLVE */
138 };
139
140
141 /* Release a nexthop info record */
142
143 void free_fib_info(struct fib_info *fi)
144 {
145         if (fi->fib_dead == 0) {
146                 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
147                 return;
148         }
149         change_nexthops(fi) {
150                 if (nh->nh_dev)
151                         dev_put(nh->nh_dev);
152                 nh->nh_dev = NULL;
153         } endfor_nexthops(fi);
154         fib_info_cnt--;
155         kfree(fi);
156 }
157
158 void fib_release_info(struct fib_info *fi)
159 {
160         spin_lock_bh(&fib_info_lock);
161         if (fi && --fi->fib_treeref == 0) {
162                 hlist_del(&fi->fib_hash);
163                 if (fi->fib_prefsrc)
164                         hlist_del(&fi->fib_lhash);
165                 change_nexthops(fi) {
166                         if (!nh->nh_dev)
167                                 continue;
168                         hlist_del(&nh->nh_hash);
169                 } endfor_nexthops(fi)
170                 fi->fib_dead = 1;
171                 fib_info_put(fi);
172         }
173         spin_unlock_bh(&fib_info_lock);
174 }
175
176 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177 {
178         const struct fib_nh *onh = ofi->fib_nh;
179
180         for_nexthops(fi) {
181                 if (nh->nh_oif != onh->nh_oif ||
182                     nh->nh_gw  != onh->nh_gw ||
183                     nh->nh_scope != onh->nh_scope ||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185                     nh->nh_weight != onh->nh_weight ||
186 #endif
187 #ifdef CONFIG_NET_CLS_ROUTE
188                     nh->nh_tclassid != onh->nh_tclassid ||
189 #endif
190                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191                         return -1;
192                 onh++;
193         } endfor_nexthops(fi);
194         return 0;
195 }
196
197 static inline unsigned int fib_devindex_hashfn(unsigned int val)
198 {
199         unsigned int mask = DEVINDEX_HASHSIZE - 1;
200
201         return (val ^
202                 (val >> DEVINDEX_HASHBITS) ^
203                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
204 }
205
206 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
207 {
208         unsigned int mask = (fib_hash_size - 1);
209         unsigned int val = fi->fib_nhs;
210
211         val ^= fi->fib_protocol;
212         val ^= (__force u32)fi->fib_prefsrc;
213         val ^= fi->fib_priority;
214         for_nexthops(fi) {
215                 val ^= fib_devindex_hashfn(nh->nh_oif);
216         } endfor_nexthops(fi)
217
218         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
219 }
220
221 static struct fib_info *fib_find_info(const struct fib_info *nfi)
222 {
223         struct hlist_head *head;
224         struct hlist_node *node;
225         struct fib_info *fi;
226         unsigned int hash;
227
228         hash = fib_info_hashfn(nfi);
229         head = &fib_info_hash[hash];
230
231         hlist_for_each_entry(fi, node, head, fib_hash) {
232                 if (fi->fib_nhs != nfi->fib_nhs)
233                         continue;
234                 if (nfi->fib_protocol == fi->fib_protocol &&
235                     nfi->fib_prefsrc == fi->fib_prefsrc &&
236                     nfi->fib_priority == fi->fib_priority &&
237                     memcmp(nfi->fib_metrics, fi->fib_metrics,
238                            sizeof(fi->fib_metrics)) == 0 &&
239                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
240                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
241                         return fi;
242         }
243
244         return NULL;
245 }
246
247 /* Check, that the gateway is already configured.
248    Used only by redirect accept routine.
249  */
250
251 int ip_fib_check_default(__be32 gw, struct net_device *dev)
252 {
253         struct hlist_head *head;
254         struct hlist_node *node;
255         struct fib_nh *nh;
256         unsigned int hash;
257
258         spin_lock(&fib_info_lock);
259
260         hash = fib_devindex_hashfn(dev->ifindex);
261         head = &fib_info_devhash[hash];
262         hlist_for_each_entry(nh, node, head, nh_hash) {
263                 if (nh->nh_dev == dev &&
264                     nh->nh_gw == gw &&
265                     !(nh->nh_flags&RTNH_F_DEAD)) {
266                         spin_unlock(&fib_info_lock);
267                         return 0;
268                 }
269         }
270
271         spin_unlock(&fib_info_lock);
272
273         return -1;
274 }
275
276 static inline size_t fib_nlmsg_size(struct fib_info *fi)
277 {
278         size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
279                          + nla_total_size(4) /* RTA_TABLE */
280                          + nla_total_size(4) /* RTA_DST */
281                          + nla_total_size(4) /* RTA_PRIORITY */
282                          + nla_total_size(4); /* RTA_PREFSRC */
283
284         /* space for nested metrics */
285         payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
286
287         if (fi->fib_nhs) {
288                 /* Also handles the special case fib_nhs == 1 */
289
290                 /* each nexthop is packed in an attribute */
291                 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
292
293                 /* may contain flow and gateway attribute */
294                 nhsize += 2 * nla_total_size(4);
295
296                 /* all nexthops are packed in a nested attribute */
297                 payload += nla_total_size(fi->fib_nhs * nhsize);
298         }
299
300         return payload;
301 }
302
303 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
304                int dst_len, u32 tb_id, struct nl_info *info,
305                unsigned int nlm_flags)
306 {
307         struct sk_buff *skb;
308         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
309         int err = -ENOBUFS;
310
311         skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
312         if (skb == NULL)
313                 goto errout;
314
315         err = fib_dump_info(skb, info->pid, seq, event, tb_id,
316                             fa->fa_type, fa->fa_scope, key, dst_len,
317                             fa->fa_tos, fa->fa_info, nlm_flags);
318         if (err < 0) {
319                 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
320                 WARN_ON(err == -EMSGSIZE);
321                 kfree_skb(skb);
322                 goto errout;
323         }
324         err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
325                           info->nlh, GFP_KERNEL);
326 errout:
327         if (err < 0)
328                 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
329 }
330
331 /* Return the first fib alias matching TOS with
332  * priority less than or equal to PRIO.
333  */
334 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
335 {
336         if (fah) {
337                 struct fib_alias *fa;
338                 list_for_each_entry(fa, fah, fa_list) {
339                         if (fa->fa_tos > tos)
340                                 continue;
341                         if (fa->fa_info->fib_priority >= prio ||
342                             fa->fa_tos < tos)
343                                 return fa;
344                 }
345         }
346         return NULL;
347 }
348
349 int fib_detect_death(struct fib_info *fi, int order,
350                      struct fib_info **last_resort, int *last_idx, int dflt)
351 {
352         struct neighbour *n;
353         int state = NUD_NONE;
354
355         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
356         if (n) {
357                 state = n->nud_state;
358                 neigh_release(n);
359         }
360         if (state==NUD_REACHABLE)
361                 return 0;
362         if ((state&NUD_VALID) && order != dflt)
363                 return 0;
364         if ((state&NUD_VALID) ||
365             (*last_idx<0 && order > dflt)) {
366                 *last_resort = fi;
367                 *last_idx = order;
368         }
369         return 1;
370 }
371
372 #ifdef CONFIG_IP_ROUTE_MULTIPATH
373
374 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
375 {
376         int nhs = 0;
377
378         while (rtnh_ok(rtnh, remaining)) {
379                 nhs++;
380                 rtnh = rtnh_next(rtnh, &remaining);
381         }
382
383         /* leftover implies invalid nexthop configuration, discard it */
384         return remaining > 0 ? 0 : nhs;
385 }
386
387 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
388                        int remaining, struct fib_config *cfg)
389 {
390         change_nexthops(fi) {
391                 int attrlen;
392
393                 if (!rtnh_ok(rtnh, remaining))
394                         return -EINVAL;
395
396                 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
397                 nh->nh_oif = rtnh->rtnh_ifindex;
398                 nh->nh_weight = rtnh->rtnh_hops + 1;
399
400                 attrlen = rtnh_attrlen(rtnh);
401                 if (attrlen > 0) {
402                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
403
404                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
405                         nh->nh_gw = nla ? nla_get_be32(nla) : 0;
406 #ifdef CONFIG_NET_CLS_ROUTE
407                         nla = nla_find(attrs, attrlen, RTA_FLOW);
408                         nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
409 #endif
410                 }
411
412                 rtnh = rtnh_next(rtnh, &remaining);
413         } endfor_nexthops(fi);
414
415         return 0;
416 }
417
418 #endif
419
420 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
421 {
422 #ifdef CONFIG_IP_ROUTE_MULTIPATH
423         struct rtnexthop *rtnh;
424         int remaining;
425 #endif
426
427         if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
428                 return 1;
429
430         if (cfg->fc_oif || cfg->fc_gw) {
431                 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
432                     (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
433                         return 0;
434                 return 1;
435         }
436
437 #ifdef CONFIG_IP_ROUTE_MULTIPATH
438         if (cfg->fc_mp == NULL)
439                 return 0;
440
441         rtnh = cfg->fc_mp;
442         remaining = cfg->fc_mp_len;
443
444         for_nexthops(fi) {
445                 int attrlen;
446
447                 if (!rtnh_ok(rtnh, remaining))
448                         return -EINVAL;
449
450                 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
451                         return 1;
452
453                 attrlen = rtnh_attrlen(rtnh);
454                 if (attrlen < 0) {
455                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
456
457                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
458                         if (nla && nla_get_be32(nla) != nh->nh_gw)
459                                 return 1;
460 #ifdef CONFIG_NET_CLS_ROUTE
461                         nla = nla_find(attrs, attrlen, RTA_FLOW);
462                         if (nla && nla_get_u32(nla) != nh->nh_tclassid)
463                                 return 1;
464 #endif
465                 }
466
467                 rtnh = rtnh_next(rtnh, &remaining);
468         } endfor_nexthops(fi);
469 #endif
470         return 0;
471 }
472
473
474 /*
475    Picture
476    -------
477
478    Semantics of nexthop is very messy by historical reasons.
479    We have to take into account, that:
480    a) gateway can be actually local interface address,
481       so that gatewayed route is direct.
482    b) gateway must be on-link address, possibly
483       described not by an ifaddr, but also by a direct route.
484    c) If both gateway and interface are specified, they should not
485       contradict.
486    d) If we use tunnel routes, gateway could be not on-link.
487
488    Attempt to reconcile all of these (alas, self-contradictory) conditions
489    results in pretty ugly and hairy code with obscure logic.
490
491    I chose to generalized it instead, so that the size
492    of code does not increase practically, but it becomes
493    much more general.
494    Every prefix is assigned a "scope" value: "host" is local address,
495    "link" is direct route,
496    [ ... "site" ... "interior" ... ]
497    and "universe" is true gateway route with global meaning.
498
499    Every prefix refers to a set of "nexthop"s (gw, oif),
500    where gw must have narrower scope. This recursion stops
501    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
502    which means that gw is forced to be on link.
503
504    Code is still hairy, but now it is apparently logically
505    consistent and very flexible. F.e. as by-product it allows
506    to co-exists in peace independent exterior and interior
507    routing processes.
508
509    Normally it looks as following.
510
511    {universe prefix}  -> (gw, oif) [scope link]
512                           |
513                           |-> {link prefix} -> (gw, oif) [scope local]
514                                                 |
515                                                 |-> {local prefix} (terminal node)
516  */
517
518 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
519                         struct fib_nh *nh)
520 {
521         int err;
522
523         if (nh->nh_gw) {
524                 struct fib_result res;
525
526 #ifdef CONFIG_IP_ROUTE_PERVASIVE
527                 if (nh->nh_flags&RTNH_F_PERVASIVE)
528                         return 0;
529 #endif
530                 if (nh->nh_flags&RTNH_F_ONLINK) {
531                         struct net_device *dev;
532
533                         if (cfg->fc_scope >= RT_SCOPE_LINK)
534                                 return -EINVAL;
535                         if (inet_addr_type(cfg->fc_nlinfo.nl_net,
536                                            nh->nh_gw) != RTN_UNICAST)
537                                 return -EINVAL;
538                         if ((dev = __dev_get_by_index(cfg->fc_nlinfo.nl_net,
539                                                       nh->nh_oif)) == NULL)
540                                 return -ENODEV;
541                         if (!(dev->flags&IFF_UP))
542                                 return -ENETDOWN;
543                         nh->nh_dev = dev;
544                         dev_hold(dev);
545                         nh->nh_scope = RT_SCOPE_LINK;
546                         return 0;
547                 }
548                 {
549                         struct flowi fl = {
550                                 .nl_u = {
551                                         .ip4_u = {
552                                                 .daddr = nh->nh_gw,
553                                                 .scope = cfg->fc_scope + 1,
554                                         },
555                                 },
556                                 .oif = nh->nh_oif,
557                         };
558
559                         /* It is not necessary, but requires a bit of thinking */
560                         if (fl.fl4_scope < RT_SCOPE_LINK)
561                                 fl.fl4_scope = RT_SCOPE_LINK;
562                         if ((err = fib_lookup(&fl, &res)) != 0)
563                                 return err;
564                 }
565                 err = -EINVAL;
566                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
567                         goto out;
568                 nh->nh_scope = res.scope;
569                 nh->nh_oif = FIB_RES_OIF(res);
570                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
571                         goto out;
572                 dev_hold(nh->nh_dev);
573                 err = -ENETDOWN;
574                 if (!(nh->nh_dev->flags & IFF_UP))
575                         goto out;
576                 err = 0;
577 out:
578                 fib_res_put(&res);
579                 return err;
580         } else {
581                 struct in_device *in_dev;
582
583                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
584                         return -EINVAL;
585
586                 in_dev = inetdev_by_index(nh->nh_oif);
587                 if (in_dev == NULL)
588                         return -ENODEV;
589                 if (!(in_dev->dev->flags&IFF_UP)) {
590                         in_dev_put(in_dev);
591                         return -ENETDOWN;
592                 }
593                 nh->nh_dev = in_dev->dev;
594                 dev_hold(nh->nh_dev);
595                 nh->nh_scope = RT_SCOPE_HOST;
596                 in_dev_put(in_dev);
597         }
598         return 0;
599 }
600
601 static inline unsigned int fib_laddr_hashfn(__be32 val)
602 {
603         unsigned int mask = (fib_hash_size - 1);
604
605         return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
606 }
607
608 static struct hlist_head *fib_hash_alloc(int bytes)
609 {
610         if (bytes <= PAGE_SIZE)
611                 return kzalloc(bytes, GFP_KERNEL);
612         else
613                 return (struct hlist_head *)
614                         __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
615 }
616
617 static void fib_hash_free(struct hlist_head *hash, int bytes)
618 {
619         if (!hash)
620                 return;
621
622         if (bytes <= PAGE_SIZE)
623                 kfree(hash);
624         else
625                 free_pages((unsigned long) hash, get_order(bytes));
626 }
627
628 static void fib_hash_move(struct hlist_head *new_info_hash,
629                           struct hlist_head *new_laddrhash,
630                           unsigned int new_size)
631 {
632         struct hlist_head *old_info_hash, *old_laddrhash;
633         unsigned int old_size = fib_hash_size;
634         unsigned int i, bytes;
635
636         spin_lock_bh(&fib_info_lock);
637         old_info_hash = fib_info_hash;
638         old_laddrhash = fib_info_laddrhash;
639         fib_hash_size = new_size;
640
641         for (i = 0; i < old_size; i++) {
642                 struct hlist_head *head = &fib_info_hash[i];
643                 struct hlist_node *node, *n;
644                 struct fib_info *fi;
645
646                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
647                         struct hlist_head *dest;
648                         unsigned int new_hash;
649
650                         hlist_del(&fi->fib_hash);
651
652                         new_hash = fib_info_hashfn(fi);
653                         dest = &new_info_hash[new_hash];
654                         hlist_add_head(&fi->fib_hash, dest);
655                 }
656         }
657         fib_info_hash = new_info_hash;
658
659         for (i = 0; i < old_size; i++) {
660                 struct hlist_head *lhead = &fib_info_laddrhash[i];
661                 struct hlist_node *node, *n;
662                 struct fib_info *fi;
663
664                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
665                         struct hlist_head *ldest;
666                         unsigned int new_hash;
667
668                         hlist_del(&fi->fib_lhash);
669
670                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
671                         ldest = &new_laddrhash[new_hash];
672                         hlist_add_head(&fi->fib_lhash, ldest);
673                 }
674         }
675         fib_info_laddrhash = new_laddrhash;
676
677         spin_unlock_bh(&fib_info_lock);
678
679         bytes = old_size * sizeof(struct hlist_head *);
680         fib_hash_free(old_info_hash, bytes);
681         fib_hash_free(old_laddrhash, bytes);
682 }
683
684 struct fib_info *fib_create_info(struct fib_config *cfg)
685 {
686         int err;
687         struct fib_info *fi = NULL;
688         struct fib_info *ofi;
689         int nhs = 1;
690
691         /* Fast check to catch the most weird cases */
692         if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
693                 goto err_inval;
694
695 #ifdef CONFIG_IP_ROUTE_MULTIPATH
696         if (cfg->fc_mp) {
697                 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
698                 if (nhs == 0)
699                         goto err_inval;
700         }
701 #endif
702
703         err = -ENOBUFS;
704         if (fib_info_cnt >= fib_hash_size) {
705                 unsigned int new_size = fib_hash_size << 1;
706                 struct hlist_head *new_info_hash;
707                 struct hlist_head *new_laddrhash;
708                 unsigned int bytes;
709
710                 if (!new_size)
711                         new_size = 1;
712                 bytes = new_size * sizeof(struct hlist_head *);
713                 new_info_hash = fib_hash_alloc(bytes);
714                 new_laddrhash = fib_hash_alloc(bytes);
715                 if (!new_info_hash || !new_laddrhash) {
716                         fib_hash_free(new_info_hash, bytes);
717                         fib_hash_free(new_laddrhash, bytes);
718                 } else
719                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
720
721                 if (!fib_hash_size)
722                         goto failure;
723         }
724
725         fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
726         if (fi == NULL)
727                 goto failure;
728         fib_info_cnt++;
729
730         fi->fib_protocol = cfg->fc_protocol;
731         fi->fib_flags = cfg->fc_flags;
732         fi->fib_priority = cfg->fc_priority;
733         fi->fib_prefsrc = cfg->fc_prefsrc;
734
735         fi->fib_nhs = nhs;
736         change_nexthops(fi) {
737                 nh->nh_parent = fi;
738         } endfor_nexthops(fi)
739
740         if (cfg->fc_mx) {
741                 struct nlattr *nla;
742                 int remaining;
743
744                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
745                         int type = nla_type(nla);
746
747                         if (type) {
748                                 if (type > RTAX_MAX)
749                                         goto err_inval;
750                                 fi->fib_metrics[type - 1] = nla_get_u32(nla);
751                         }
752                 }
753         }
754
755         if (cfg->fc_mp) {
756 #ifdef CONFIG_IP_ROUTE_MULTIPATH
757                 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
758                 if (err != 0)
759                         goto failure;
760                 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
761                         goto err_inval;
762                 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
763                         goto err_inval;
764 #ifdef CONFIG_NET_CLS_ROUTE
765                 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
766                         goto err_inval;
767 #endif
768 #else
769                 goto err_inval;
770 #endif
771         } else {
772                 struct fib_nh *nh = fi->fib_nh;
773
774                 nh->nh_oif = cfg->fc_oif;
775                 nh->nh_gw = cfg->fc_gw;
776                 nh->nh_flags = cfg->fc_flags;
777 #ifdef CONFIG_NET_CLS_ROUTE
778                 nh->nh_tclassid = cfg->fc_flow;
779 #endif
780 #ifdef CONFIG_IP_ROUTE_MULTIPATH
781                 nh->nh_weight = 1;
782 #endif
783         }
784
785         if (fib_props[cfg->fc_type].error) {
786                 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
787                         goto err_inval;
788                 goto link_it;
789         }
790
791         if (cfg->fc_scope > RT_SCOPE_HOST)
792                 goto err_inval;
793
794         if (cfg->fc_scope == RT_SCOPE_HOST) {
795                 struct fib_nh *nh = fi->fib_nh;
796
797                 /* Local address is added. */
798                 if (nhs != 1 || nh->nh_gw)
799                         goto err_inval;
800                 nh->nh_scope = RT_SCOPE_NOWHERE;
801                 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
802                                               fi->fib_nh->nh_oif);
803                 err = -ENODEV;
804                 if (nh->nh_dev == NULL)
805                         goto failure;
806         } else {
807                 change_nexthops(fi) {
808                         if ((err = fib_check_nh(cfg, fi, nh)) != 0)
809                                 goto failure;
810                 } endfor_nexthops(fi)
811         }
812
813         if (fi->fib_prefsrc) {
814                 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
815                     fi->fib_prefsrc != cfg->fc_dst)
816                         if (inet_addr_type(cfg->fc_nlinfo.nl_net,
817                                            fi->fib_prefsrc) != RTN_LOCAL)
818                                 goto err_inval;
819         }
820
821 link_it:
822         if ((ofi = fib_find_info(fi)) != NULL) {
823                 fi->fib_dead = 1;
824                 free_fib_info(fi);
825                 ofi->fib_treeref++;
826                 return ofi;
827         }
828
829         fi->fib_treeref++;
830         atomic_inc(&fi->fib_clntref);
831         spin_lock_bh(&fib_info_lock);
832         hlist_add_head(&fi->fib_hash,
833                        &fib_info_hash[fib_info_hashfn(fi)]);
834         if (fi->fib_prefsrc) {
835                 struct hlist_head *head;
836
837                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
838                 hlist_add_head(&fi->fib_lhash, head);
839         }
840         change_nexthops(fi) {
841                 struct hlist_head *head;
842                 unsigned int hash;
843
844                 if (!nh->nh_dev)
845                         continue;
846                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
847                 head = &fib_info_devhash[hash];
848                 hlist_add_head(&nh->nh_hash, head);
849         } endfor_nexthops(fi)
850         spin_unlock_bh(&fib_info_lock);
851         return fi;
852
853 err_inval:
854         err = -EINVAL;
855
856 failure:
857         if (fi) {
858                 fi->fib_dead = 1;
859                 free_fib_info(fi);
860         }
861
862         return ERR_PTR(err);
863 }
864
865 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
866 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
867                        struct fib_result *res, __be32 zone, __be32 mask,
868                         int prefixlen)
869 {
870         struct fib_alias *fa;
871         int nh_sel = 0;
872
873         list_for_each_entry_rcu(fa, head, fa_list) {
874                 int err;
875
876                 if (fa->fa_tos &&
877                     fa->fa_tos != flp->fl4_tos)
878                         continue;
879
880                 if (fa->fa_scope < flp->fl4_scope)
881                         continue;
882
883                 fa->fa_state |= FA_S_ACCESSED;
884
885                 err = fib_props[fa->fa_type].error;
886                 if (err == 0) {
887                         struct fib_info *fi = fa->fa_info;
888
889                         if (fi->fib_flags & RTNH_F_DEAD)
890                                 continue;
891
892                         switch (fa->fa_type) {
893                         case RTN_UNICAST:
894                         case RTN_LOCAL:
895                         case RTN_BROADCAST:
896                         case RTN_ANYCAST:
897                         case RTN_MULTICAST:
898                                 for_nexthops(fi) {
899                                         if (nh->nh_flags&RTNH_F_DEAD)
900                                                 continue;
901                                         if (!flp->oif || flp->oif == nh->nh_oif)
902                                                 break;
903                                 }
904 #ifdef CONFIG_IP_ROUTE_MULTIPATH
905                                 if (nhsel < fi->fib_nhs) {
906                                         nh_sel = nhsel;
907                                         goto out_fill_res;
908                                 }
909 #else
910                                 if (nhsel < 1) {
911                                         goto out_fill_res;
912                                 }
913 #endif
914                                 endfor_nexthops(fi);
915                                 continue;
916
917                         default:
918                                 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
919                                         fa->fa_type);
920                                 return -EINVAL;
921                         }
922                 }
923                 return err;
924         }
925         return 1;
926
927 out_fill_res:
928         res->prefixlen = prefixlen;
929         res->nh_sel = nh_sel;
930         res->type = fa->fa_type;
931         res->scope = fa->fa_scope;
932         res->fi = fa->fa_info;
933         atomic_inc(&res->fi->fib_clntref);
934         return 0;
935 }
936
937 /* Find appropriate source address to this destination */
938
939 __be32 __fib_res_prefsrc(struct fib_result *res)
940 {
941         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
942 }
943
944 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
945                   u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
946                   struct fib_info *fi, unsigned int flags)
947 {
948         struct nlmsghdr *nlh;
949         struct rtmsg *rtm;
950
951         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
952         if (nlh == NULL)
953                 return -EMSGSIZE;
954
955         rtm = nlmsg_data(nlh);
956         rtm->rtm_family = AF_INET;
957         rtm->rtm_dst_len = dst_len;
958         rtm->rtm_src_len = 0;
959         rtm->rtm_tos = tos;
960         rtm->rtm_table = tb_id;
961         NLA_PUT_U32(skb, RTA_TABLE, tb_id);
962         rtm->rtm_type = type;
963         rtm->rtm_flags = fi->fib_flags;
964         rtm->rtm_scope = scope;
965         rtm->rtm_protocol = fi->fib_protocol;
966
967         if (rtm->rtm_dst_len)
968                 NLA_PUT_BE32(skb, RTA_DST, dst);
969
970         if (fi->fib_priority)
971                 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
972
973         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
974                 goto nla_put_failure;
975
976         if (fi->fib_prefsrc)
977                 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
978
979         if (fi->fib_nhs == 1) {
980                 if (fi->fib_nh->nh_gw)
981                         NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
982
983                 if (fi->fib_nh->nh_oif)
984                         NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
985 #ifdef CONFIG_NET_CLS_ROUTE
986                 if (fi->fib_nh[0].nh_tclassid)
987                         NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
988 #endif
989         }
990 #ifdef CONFIG_IP_ROUTE_MULTIPATH
991         if (fi->fib_nhs > 1) {
992                 struct rtnexthop *rtnh;
993                 struct nlattr *mp;
994
995                 mp = nla_nest_start(skb, RTA_MULTIPATH);
996                 if (mp == NULL)
997                         goto nla_put_failure;
998
999                 for_nexthops(fi) {
1000                         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1001                         if (rtnh == NULL)
1002                                 goto nla_put_failure;
1003
1004                         rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1005                         rtnh->rtnh_hops = nh->nh_weight - 1;
1006                         rtnh->rtnh_ifindex = nh->nh_oif;
1007
1008                         if (nh->nh_gw)
1009                                 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1010 #ifdef CONFIG_NET_CLS_ROUTE
1011                         if (nh->nh_tclassid)
1012                                 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1013 #endif
1014                         /* length of rtnetlink header + attributes */
1015                         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1016                 } endfor_nexthops(fi);
1017
1018                 nla_nest_end(skb, mp);
1019         }
1020 #endif
1021         return nlmsg_end(skb, nlh);
1022
1023 nla_put_failure:
1024         nlmsg_cancel(skb, nlh);
1025         return -EMSGSIZE;
1026 }
1027
1028 /*
1029    Update FIB if:
1030    - local address disappeared -> we must delete all the entries
1031      referring to it.
1032    - device went down -> we must shutdown all nexthops going via it.
1033  */
1034
1035 int fib_sync_down(__be32 local, struct net_device *dev, int force)
1036 {
1037         int ret = 0;
1038         int scope = RT_SCOPE_NOWHERE;
1039
1040         if (force)
1041                 scope = -1;
1042
1043         if (local && fib_info_laddrhash) {
1044                 unsigned int hash = fib_laddr_hashfn(local);
1045                 struct hlist_head *head = &fib_info_laddrhash[hash];
1046                 struct hlist_node *node;
1047                 struct fib_info *fi;
1048
1049                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1050                         if (fi->fib_prefsrc == local) {
1051                                 fi->fib_flags |= RTNH_F_DEAD;
1052                                 ret++;
1053                         }
1054                 }
1055         }
1056
1057         if (dev) {
1058                 struct fib_info *prev_fi = NULL;
1059                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1060                 struct hlist_head *head = &fib_info_devhash[hash];
1061                 struct hlist_node *node;
1062                 struct fib_nh *nh;
1063
1064                 hlist_for_each_entry(nh, node, head, nh_hash) {
1065                         struct fib_info *fi = nh->nh_parent;
1066                         int dead;
1067
1068                         BUG_ON(!fi->fib_nhs);
1069                         if (nh->nh_dev != dev || fi == prev_fi)
1070                                 continue;
1071                         prev_fi = fi;
1072                         dead = 0;
1073                         change_nexthops(fi) {
1074                                 if (nh->nh_flags&RTNH_F_DEAD)
1075                                         dead++;
1076                                 else if (nh->nh_dev == dev &&
1077                                          nh->nh_scope != scope) {
1078                                         nh->nh_flags |= RTNH_F_DEAD;
1079 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1080                                         spin_lock_bh(&fib_multipath_lock);
1081                                         fi->fib_power -= nh->nh_power;
1082                                         nh->nh_power = 0;
1083                                         spin_unlock_bh(&fib_multipath_lock);
1084 #endif
1085                                         dead++;
1086                                 }
1087 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1088                                 if (force > 1 && nh->nh_dev == dev) {
1089                                         dead = fi->fib_nhs;
1090                                         break;
1091                                 }
1092 #endif
1093                         } endfor_nexthops(fi)
1094                         if (dead == fi->fib_nhs) {
1095                                 fi->fib_flags |= RTNH_F_DEAD;
1096                                 ret++;
1097                         }
1098                 }
1099         }
1100
1101         return ret;
1102 }
1103
1104 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1105
1106 /*
1107    Dead device goes up. We wake up dead nexthops.
1108    It takes sense only on multipath routes.
1109  */
1110
1111 int fib_sync_up(struct net_device *dev)
1112 {
1113         struct fib_info *prev_fi;
1114         unsigned int hash;
1115         struct hlist_head *head;
1116         struct hlist_node *node;
1117         struct fib_nh *nh;
1118         int ret;
1119
1120         if (!(dev->flags&IFF_UP))
1121                 return 0;
1122
1123         prev_fi = NULL;
1124         hash = fib_devindex_hashfn(dev->ifindex);
1125         head = &fib_info_devhash[hash];
1126         ret = 0;
1127
1128         hlist_for_each_entry(nh, node, head, nh_hash) {
1129                 struct fib_info *fi = nh->nh_parent;
1130                 int alive;
1131
1132                 BUG_ON(!fi->fib_nhs);
1133                 if (nh->nh_dev != dev || fi == prev_fi)
1134                         continue;
1135
1136                 prev_fi = fi;
1137                 alive = 0;
1138                 change_nexthops(fi) {
1139                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1140                                 alive++;
1141                                 continue;
1142                         }
1143                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1144                                 continue;
1145                         if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1146                                 continue;
1147                         alive++;
1148                         spin_lock_bh(&fib_multipath_lock);
1149                         nh->nh_power = 0;
1150                         nh->nh_flags &= ~RTNH_F_DEAD;
1151                         spin_unlock_bh(&fib_multipath_lock);
1152                 } endfor_nexthops(fi)
1153
1154                 if (alive > 0) {
1155                         fi->fib_flags &= ~RTNH_F_DEAD;
1156                         ret++;
1157                 }
1158         }
1159
1160         return ret;
1161 }
1162
1163 /*
1164    The algorithm is suboptimal, but it provides really
1165    fair weighted route distribution.
1166  */
1167
1168 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1169 {
1170         struct fib_info *fi = res->fi;
1171         int w;
1172
1173         spin_lock_bh(&fib_multipath_lock);
1174         if (fi->fib_power <= 0) {
1175                 int power = 0;
1176                 change_nexthops(fi) {
1177                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1178                                 power += nh->nh_weight;
1179                                 nh->nh_power = nh->nh_weight;
1180                         }
1181                 } endfor_nexthops(fi);
1182                 fi->fib_power = power;
1183                 if (power <= 0) {
1184                         spin_unlock_bh(&fib_multipath_lock);
1185                         /* Race condition: route has just become dead. */
1186                         res->nh_sel = 0;
1187                         return;
1188                 }
1189         }
1190
1191
1192         /* w should be random number [0..fi->fib_power-1],
1193            it is pretty bad approximation.
1194          */
1195
1196         w = jiffies % fi->fib_power;
1197
1198         change_nexthops(fi) {
1199                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1200                         if ((w -= nh->nh_power) <= 0) {
1201                                 nh->nh_power--;
1202                                 fi->fib_power--;
1203                                 res->nh_sel = nhsel;
1204                                 spin_unlock_bh(&fib_multipath_lock);
1205                                 return;
1206                         }
1207                 }
1208         } endfor_nexthops(fi);
1209
1210         /* Race condition: route has just become dead. */
1211         res->nh_sel = 0;
1212         spin_unlock_bh(&fib_multipath_lock);
1213 }
1214 #endif