[IPV4]: no need pass pointer to a default into fib_detect_death
[safe/jmp/linux-2.6] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
24 #include <linux/mm.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/in.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
37
38 #include <net/arp.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/netlink.h>
46 #include <net/nexthop.h>
47
48 #include "fib_lookup.h"
49
50 #define FSprintk(a...)
51
52 static DEFINE_SPINLOCK(fib_info_lock);
53 static struct hlist_head *fib_info_hash;
54 static struct hlist_head *fib_info_laddrhash;
55 static unsigned int fib_hash_size;
56 static unsigned int fib_info_cnt;
57
58 #define DEVINDEX_HASHBITS 8
59 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
61
62 #ifdef CONFIG_IP_ROUTE_MULTIPATH
63
64 static DEFINE_SPINLOCK(fib_multipath_lock);
65
66 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
68
69 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
71
72 #else /* CONFIG_IP_ROUTE_MULTIPATH */
73
74 /* Hope, that gcc will optimize it to get rid of dummy loop */
75
76 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77 for (nhsel=0; nhsel < 1; nhsel++)
78
79 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80 for (nhsel=0; nhsel < 1; nhsel++)
81
82 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
83
84 #define endfor_nexthops(fi) }
85
86
87 static const struct
88 {
89         int     error;
90         u8      scope;
91 } fib_props[RTN_MAX + 1] = {
92         {
93                 .error  = 0,
94                 .scope  = RT_SCOPE_NOWHERE,
95         },      /* RTN_UNSPEC */
96         {
97                 .error  = 0,
98                 .scope  = RT_SCOPE_UNIVERSE,
99         },      /* RTN_UNICAST */
100         {
101                 .error  = 0,
102                 .scope  = RT_SCOPE_HOST,
103         },      /* RTN_LOCAL */
104         {
105                 .error  = 0,
106                 .scope  = RT_SCOPE_LINK,
107         },      /* RTN_BROADCAST */
108         {
109                 .error  = 0,
110                 .scope  = RT_SCOPE_LINK,
111         },      /* RTN_ANYCAST */
112         {
113                 .error  = 0,
114                 .scope  = RT_SCOPE_UNIVERSE,
115         },      /* RTN_MULTICAST */
116         {
117                 .error  = -EINVAL,
118                 .scope  = RT_SCOPE_UNIVERSE,
119         },      /* RTN_BLACKHOLE */
120         {
121                 .error  = -EHOSTUNREACH,
122                 .scope  = RT_SCOPE_UNIVERSE,
123         },      /* RTN_UNREACHABLE */
124         {
125                 .error  = -EACCES,
126                 .scope  = RT_SCOPE_UNIVERSE,
127         },      /* RTN_PROHIBIT */
128         {
129                 .error  = -EAGAIN,
130                 .scope  = RT_SCOPE_UNIVERSE,
131         },      /* RTN_THROW */
132         {
133                 .error  = -EINVAL,
134                 .scope  = RT_SCOPE_NOWHERE,
135         },      /* RTN_NAT */
136         {
137                 .error  = -EINVAL,
138                 .scope  = RT_SCOPE_NOWHERE,
139         },      /* RTN_XRESOLVE */
140 };
141
142
143 /* Release a nexthop info record */
144
145 void free_fib_info(struct fib_info *fi)
146 {
147         if (fi->fib_dead == 0) {
148                 printk("Freeing alive fib_info %p\n", fi);
149                 return;
150         }
151         change_nexthops(fi) {
152                 if (nh->nh_dev)
153                         dev_put(nh->nh_dev);
154                 nh->nh_dev = NULL;
155         } endfor_nexthops(fi);
156         fib_info_cnt--;
157         kfree(fi);
158 }
159
160 void fib_release_info(struct fib_info *fi)
161 {
162         spin_lock_bh(&fib_info_lock);
163         if (fi && --fi->fib_treeref == 0) {
164                 hlist_del(&fi->fib_hash);
165                 if (fi->fib_prefsrc)
166                         hlist_del(&fi->fib_lhash);
167                 change_nexthops(fi) {
168                         if (!nh->nh_dev)
169                                 continue;
170                         hlist_del(&nh->nh_hash);
171                 } endfor_nexthops(fi)
172                 fi->fib_dead = 1;
173                 fib_info_put(fi);
174         }
175         spin_unlock_bh(&fib_info_lock);
176 }
177
178 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179 {
180         const struct fib_nh *onh = ofi->fib_nh;
181
182         for_nexthops(fi) {
183                 if (nh->nh_oif != onh->nh_oif ||
184                     nh->nh_gw  != onh->nh_gw ||
185                     nh->nh_scope != onh->nh_scope ||
186 #ifdef CONFIG_IP_ROUTE_MULTIPATH
187                     nh->nh_weight != onh->nh_weight ||
188 #endif
189 #ifdef CONFIG_NET_CLS_ROUTE
190                     nh->nh_tclassid != onh->nh_tclassid ||
191 #endif
192                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193                         return -1;
194                 onh++;
195         } endfor_nexthops(fi);
196         return 0;
197 }
198
199 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200 {
201         unsigned int mask = (fib_hash_size - 1);
202         unsigned int val = fi->fib_nhs;
203
204         val ^= fi->fib_protocol;
205         val ^= (__force u32)fi->fib_prefsrc;
206         val ^= fi->fib_priority;
207
208         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209 }
210
211 static struct fib_info *fib_find_info(const struct fib_info *nfi)
212 {
213         struct hlist_head *head;
214         struct hlist_node *node;
215         struct fib_info *fi;
216         unsigned int hash;
217
218         hash = fib_info_hashfn(nfi);
219         head = &fib_info_hash[hash];
220
221         hlist_for_each_entry(fi, node, head, fib_hash) {
222                 if (fi->fib_nhs != nfi->fib_nhs)
223                         continue;
224                 if (nfi->fib_protocol == fi->fib_protocol &&
225                     nfi->fib_prefsrc == fi->fib_prefsrc &&
226                     nfi->fib_priority == fi->fib_priority &&
227                     memcmp(nfi->fib_metrics, fi->fib_metrics,
228                            sizeof(fi->fib_metrics)) == 0 &&
229                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
230                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
231                         return fi;
232         }
233
234         return NULL;
235 }
236
237 static inline unsigned int fib_devindex_hashfn(unsigned int val)
238 {
239         unsigned int mask = DEVINDEX_HASHSIZE - 1;
240
241         return (val ^
242                 (val >> DEVINDEX_HASHBITS) ^
243                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244 }
245
246 /* Check, that the gateway is already configured.
247    Used only by redirect accept routine.
248  */
249
250 int ip_fib_check_default(__be32 gw, struct net_device *dev)
251 {
252         struct hlist_head *head;
253         struct hlist_node *node;
254         struct fib_nh *nh;
255         unsigned int hash;
256
257         spin_lock(&fib_info_lock);
258
259         hash = fib_devindex_hashfn(dev->ifindex);
260         head = &fib_info_devhash[hash];
261         hlist_for_each_entry(nh, node, head, nh_hash) {
262                 if (nh->nh_dev == dev &&
263                     nh->nh_gw == gw &&
264                     !(nh->nh_flags&RTNH_F_DEAD)) {
265                         spin_unlock(&fib_info_lock);
266                         return 0;
267                 }
268         }
269
270         spin_unlock(&fib_info_lock);
271
272         return -1;
273 }
274
275 static inline size_t fib_nlmsg_size(struct fib_info *fi)
276 {
277         size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
278                          + nla_total_size(4) /* RTA_TABLE */
279                          + nla_total_size(4) /* RTA_DST */
280                          + nla_total_size(4) /* RTA_PRIORITY */
281                          + nla_total_size(4); /* RTA_PREFSRC */
282
283         /* space for nested metrics */
284         payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
285
286         if (fi->fib_nhs) {
287                 /* Also handles the special case fib_nhs == 1 */
288
289                 /* each nexthop is packed in an attribute */
290                 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
291
292                 /* may contain flow and gateway attribute */
293                 nhsize += 2 * nla_total_size(4);
294
295                 /* all nexthops are packed in a nested attribute */
296                 payload += nla_total_size(fi->fib_nhs * nhsize);
297         }
298
299         return payload;
300 }
301
302 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
303                int dst_len, u32 tb_id, struct nl_info *info,
304                unsigned int nlm_flags)
305 {
306         struct sk_buff *skb;
307         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
308         int err = -ENOBUFS;
309
310         skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
311         if (skb == NULL)
312                 goto errout;
313
314         err = fib_dump_info(skb, info->pid, seq, event, tb_id,
315                             fa->fa_type, fa->fa_scope, key, dst_len,
316                             fa->fa_tos, fa->fa_info, nlm_flags);
317         if (err < 0) {
318                 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
319                 WARN_ON(err == -EMSGSIZE);
320                 kfree_skb(skb);
321                 goto errout;
322         }
323         err = rtnl_notify(skb, &init_net, info->pid, RTNLGRP_IPV4_ROUTE,
324                           info->nlh, GFP_KERNEL);
325 errout:
326         if (err < 0)
327                 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_ROUTE, err);
328 }
329
330 /* Return the first fib alias matching TOS with
331  * priority less than or equal to PRIO.
332  */
333 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
334 {
335         if (fah) {
336                 struct fib_alias *fa;
337                 list_for_each_entry(fa, fah, fa_list) {
338                         if (fa->fa_tos > tos)
339                                 continue;
340                         if (fa->fa_info->fib_priority >= prio ||
341                             fa->fa_tos < tos)
342                                 return fa;
343                 }
344         }
345         return NULL;
346 }
347
348 int fib_detect_death(struct fib_info *fi, int order,
349                      struct fib_info **last_resort, int *last_idx, int dflt)
350 {
351         struct neighbour *n;
352         int state = NUD_NONE;
353
354         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
355         if (n) {
356                 state = n->nud_state;
357                 neigh_release(n);
358         }
359         if (state==NUD_REACHABLE)
360                 return 0;
361         if ((state&NUD_VALID) && order != dflt)
362                 return 0;
363         if ((state&NUD_VALID) ||
364             (*last_idx<0 && order > dflt)) {
365                 *last_resort = fi;
366                 *last_idx = order;
367         }
368         return 1;
369 }
370
371 #ifdef CONFIG_IP_ROUTE_MULTIPATH
372
373 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
374 {
375         int nhs = 0;
376
377         while (rtnh_ok(rtnh, remaining)) {
378                 nhs++;
379                 rtnh = rtnh_next(rtnh, &remaining);
380         }
381
382         /* leftover implies invalid nexthop configuration, discard it */
383         return remaining > 0 ? 0 : nhs;
384 }
385
386 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
387                        int remaining, struct fib_config *cfg)
388 {
389         change_nexthops(fi) {
390                 int attrlen;
391
392                 if (!rtnh_ok(rtnh, remaining))
393                         return -EINVAL;
394
395                 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
396                 nh->nh_oif = rtnh->rtnh_ifindex;
397                 nh->nh_weight = rtnh->rtnh_hops + 1;
398
399                 attrlen = rtnh_attrlen(rtnh);
400                 if (attrlen > 0) {
401                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
402
403                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
404                         nh->nh_gw = nla ? nla_get_be32(nla) : 0;
405 #ifdef CONFIG_NET_CLS_ROUTE
406                         nla = nla_find(attrs, attrlen, RTA_FLOW);
407                         nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
408 #endif
409                 }
410
411                 rtnh = rtnh_next(rtnh, &remaining);
412         } endfor_nexthops(fi);
413
414         return 0;
415 }
416
417 #endif
418
419 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
420 {
421 #ifdef CONFIG_IP_ROUTE_MULTIPATH
422         struct rtnexthop *rtnh;
423         int remaining;
424 #endif
425
426         if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
427                 return 1;
428
429         if (cfg->fc_oif || cfg->fc_gw) {
430                 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
431                     (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
432                         return 0;
433                 return 1;
434         }
435
436 #ifdef CONFIG_IP_ROUTE_MULTIPATH
437         if (cfg->fc_mp == NULL)
438                 return 0;
439
440         rtnh = cfg->fc_mp;
441         remaining = cfg->fc_mp_len;
442
443         for_nexthops(fi) {
444                 int attrlen;
445
446                 if (!rtnh_ok(rtnh, remaining))
447                         return -EINVAL;
448
449                 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
450                         return 1;
451
452                 attrlen = rtnh_attrlen(rtnh);
453                 if (attrlen < 0) {
454                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
455
456                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
457                         if (nla && nla_get_be32(nla) != nh->nh_gw)
458                                 return 1;
459 #ifdef CONFIG_NET_CLS_ROUTE
460                         nla = nla_find(attrs, attrlen, RTA_FLOW);
461                         if (nla && nla_get_u32(nla) != nh->nh_tclassid)
462                                 return 1;
463 #endif
464                 }
465
466                 rtnh = rtnh_next(rtnh, &remaining);
467         } endfor_nexthops(fi);
468 #endif
469         return 0;
470 }
471
472
473 /*
474    Picture
475    -------
476
477    Semantics of nexthop is very messy by historical reasons.
478    We have to take into account, that:
479    a) gateway can be actually local interface address,
480       so that gatewayed route is direct.
481    b) gateway must be on-link address, possibly
482       described not by an ifaddr, but also by a direct route.
483    c) If both gateway and interface are specified, they should not
484       contradict.
485    d) If we use tunnel routes, gateway could be not on-link.
486
487    Attempt to reconcile all of these (alas, self-contradictory) conditions
488    results in pretty ugly and hairy code with obscure logic.
489
490    I chose to generalized it instead, so that the size
491    of code does not increase practically, but it becomes
492    much more general.
493    Every prefix is assigned a "scope" value: "host" is local address,
494    "link" is direct route,
495    [ ... "site" ... "interior" ... ]
496    and "universe" is true gateway route with global meaning.
497
498    Every prefix refers to a set of "nexthop"s (gw, oif),
499    where gw must have narrower scope. This recursion stops
500    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
501    which means that gw is forced to be on link.
502
503    Code is still hairy, but now it is apparently logically
504    consistent and very flexible. F.e. as by-product it allows
505    to co-exists in peace independent exterior and interior
506    routing processes.
507
508    Normally it looks as following.
509
510    {universe prefix}  -> (gw, oif) [scope link]
511                           |
512                           |-> {link prefix} -> (gw, oif) [scope local]
513                                                 |
514                                                 |-> {local prefix} (terminal node)
515  */
516
517 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
518                         struct fib_nh *nh)
519 {
520         int err;
521
522         if (nh->nh_gw) {
523                 struct fib_result res;
524
525 #ifdef CONFIG_IP_ROUTE_PERVASIVE
526                 if (nh->nh_flags&RTNH_F_PERVASIVE)
527                         return 0;
528 #endif
529                 if (nh->nh_flags&RTNH_F_ONLINK) {
530                         struct net_device *dev;
531
532                         if (cfg->fc_scope >= RT_SCOPE_LINK)
533                                 return -EINVAL;
534                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
535                                 return -EINVAL;
536                         if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
537                                 return -ENODEV;
538                         if (!(dev->flags&IFF_UP))
539                                 return -ENETDOWN;
540                         nh->nh_dev = dev;
541                         dev_hold(dev);
542                         nh->nh_scope = RT_SCOPE_LINK;
543                         return 0;
544                 }
545                 {
546                         struct flowi fl = {
547                                 .nl_u = {
548                                         .ip4_u = {
549                                                 .daddr = nh->nh_gw,
550                                                 .scope = cfg->fc_scope + 1,
551                                         },
552                                 },
553                                 .oif = nh->nh_oif,
554                         };
555
556                         /* It is not necessary, but requires a bit of thinking */
557                         if (fl.fl4_scope < RT_SCOPE_LINK)
558                                 fl.fl4_scope = RT_SCOPE_LINK;
559                         if ((err = fib_lookup(&fl, &res)) != 0)
560                                 return err;
561                 }
562                 err = -EINVAL;
563                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
564                         goto out;
565                 nh->nh_scope = res.scope;
566                 nh->nh_oif = FIB_RES_OIF(res);
567                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
568                         goto out;
569                 dev_hold(nh->nh_dev);
570                 err = -ENETDOWN;
571                 if (!(nh->nh_dev->flags & IFF_UP))
572                         goto out;
573                 err = 0;
574 out:
575                 fib_res_put(&res);
576                 return err;
577         } else {
578                 struct in_device *in_dev;
579
580                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
581                         return -EINVAL;
582
583                 in_dev = inetdev_by_index(nh->nh_oif);
584                 if (in_dev == NULL)
585                         return -ENODEV;
586                 if (!(in_dev->dev->flags&IFF_UP)) {
587                         in_dev_put(in_dev);
588                         return -ENETDOWN;
589                 }
590                 nh->nh_dev = in_dev->dev;
591                 dev_hold(nh->nh_dev);
592                 nh->nh_scope = RT_SCOPE_HOST;
593                 in_dev_put(in_dev);
594         }
595         return 0;
596 }
597
598 static inline unsigned int fib_laddr_hashfn(__be32 val)
599 {
600         unsigned int mask = (fib_hash_size - 1);
601
602         return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
603 }
604
605 static struct hlist_head *fib_hash_alloc(int bytes)
606 {
607         if (bytes <= PAGE_SIZE)
608                 return kzalloc(bytes, GFP_KERNEL);
609         else
610                 return (struct hlist_head *)
611                         __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
612 }
613
614 static void fib_hash_free(struct hlist_head *hash, int bytes)
615 {
616         if (!hash)
617                 return;
618
619         if (bytes <= PAGE_SIZE)
620                 kfree(hash);
621         else
622                 free_pages((unsigned long) hash, get_order(bytes));
623 }
624
625 static void fib_hash_move(struct hlist_head *new_info_hash,
626                           struct hlist_head *new_laddrhash,
627                           unsigned int new_size)
628 {
629         struct hlist_head *old_info_hash, *old_laddrhash;
630         unsigned int old_size = fib_hash_size;
631         unsigned int i, bytes;
632
633         spin_lock_bh(&fib_info_lock);
634         old_info_hash = fib_info_hash;
635         old_laddrhash = fib_info_laddrhash;
636         fib_hash_size = new_size;
637
638         for (i = 0; i < old_size; i++) {
639                 struct hlist_head *head = &fib_info_hash[i];
640                 struct hlist_node *node, *n;
641                 struct fib_info *fi;
642
643                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
644                         struct hlist_head *dest;
645                         unsigned int new_hash;
646
647                         hlist_del(&fi->fib_hash);
648
649                         new_hash = fib_info_hashfn(fi);
650                         dest = &new_info_hash[new_hash];
651                         hlist_add_head(&fi->fib_hash, dest);
652                 }
653         }
654         fib_info_hash = new_info_hash;
655
656         for (i = 0; i < old_size; i++) {
657                 struct hlist_head *lhead = &fib_info_laddrhash[i];
658                 struct hlist_node *node, *n;
659                 struct fib_info *fi;
660
661                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
662                         struct hlist_head *ldest;
663                         unsigned int new_hash;
664
665                         hlist_del(&fi->fib_lhash);
666
667                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
668                         ldest = &new_laddrhash[new_hash];
669                         hlist_add_head(&fi->fib_lhash, ldest);
670                 }
671         }
672         fib_info_laddrhash = new_laddrhash;
673
674         spin_unlock_bh(&fib_info_lock);
675
676         bytes = old_size * sizeof(struct hlist_head *);
677         fib_hash_free(old_info_hash, bytes);
678         fib_hash_free(old_laddrhash, bytes);
679 }
680
681 struct fib_info *fib_create_info(struct fib_config *cfg)
682 {
683         int err;
684         struct fib_info *fi = NULL;
685         struct fib_info *ofi;
686         int nhs = 1;
687
688         /* Fast check to catch the most weird cases */
689         if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
690                 goto err_inval;
691
692 #ifdef CONFIG_IP_ROUTE_MULTIPATH
693         if (cfg->fc_mp) {
694                 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
695                 if (nhs == 0)
696                         goto err_inval;
697         }
698 #endif
699
700         err = -ENOBUFS;
701         if (fib_info_cnt >= fib_hash_size) {
702                 unsigned int new_size = fib_hash_size << 1;
703                 struct hlist_head *new_info_hash;
704                 struct hlist_head *new_laddrhash;
705                 unsigned int bytes;
706
707                 if (!new_size)
708                         new_size = 1;
709                 bytes = new_size * sizeof(struct hlist_head *);
710                 new_info_hash = fib_hash_alloc(bytes);
711                 new_laddrhash = fib_hash_alloc(bytes);
712                 if (!new_info_hash || !new_laddrhash) {
713                         fib_hash_free(new_info_hash, bytes);
714                         fib_hash_free(new_laddrhash, bytes);
715                 } else
716                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
717
718                 if (!fib_hash_size)
719                         goto failure;
720         }
721
722         fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
723         if (fi == NULL)
724                 goto failure;
725         fib_info_cnt++;
726
727         fi->fib_protocol = cfg->fc_protocol;
728         fi->fib_flags = cfg->fc_flags;
729         fi->fib_priority = cfg->fc_priority;
730         fi->fib_prefsrc = cfg->fc_prefsrc;
731
732         fi->fib_nhs = nhs;
733         change_nexthops(fi) {
734                 nh->nh_parent = fi;
735         } endfor_nexthops(fi)
736
737         if (cfg->fc_mx) {
738                 struct nlattr *nla;
739                 int remaining;
740
741                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
742                         int type = nla_type(nla);
743
744                         if (type) {
745                                 if (type > RTAX_MAX)
746                                         goto err_inval;
747                                 fi->fib_metrics[type - 1] = nla_get_u32(nla);
748                         }
749                 }
750         }
751
752         if (cfg->fc_mp) {
753 #ifdef CONFIG_IP_ROUTE_MULTIPATH
754                 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
755                 if (err != 0)
756                         goto failure;
757                 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
758                         goto err_inval;
759                 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
760                         goto err_inval;
761 #ifdef CONFIG_NET_CLS_ROUTE
762                 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
763                         goto err_inval;
764 #endif
765 #else
766                 goto err_inval;
767 #endif
768         } else {
769                 struct fib_nh *nh = fi->fib_nh;
770
771                 nh->nh_oif = cfg->fc_oif;
772                 nh->nh_gw = cfg->fc_gw;
773                 nh->nh_flags = cfg->fc_flags;
774 #ifdef CONFIG_NET_CLS_ROUTE
775                 nh->nh_tclassid = cfg->fc_flow;
776 #endif
777 #ifdef CONFIG_IP_ROUTE_MULTIPATH
778                 nh->nh_weight = 1;
779 #endif
780         }
781
782         if (fib_props[cfg->fc_type].error) {
783                 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
784                         goto err_inval;
785                 goto link_it;
786         }
787
788         if (cfg->fc_scope > RT_SCOPE_HOST)
789                 goto err_inval;
790
791         if (cfg->fc_scope == RT_SCOPE_HOST) {
792                 struct fib_nh *nh = fi->fib_nh;
793
794                 /* Local address is added. */
795                 if (nhs != 1 || nh->nh_gw)
796                         goto err_inval;
797                 nh->nh_scope = RT_SCOPE_NOWHERE;
798                 nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
799                 err = -ENODEV;
800                 if (nh->nh_dev == NULL)
801                         goto failure;
802         } else {
803                 change_nexthops(fi) {
804                         if ((err = fib_check_nh(cfg, fi, nh)) != 0)
805                                 goto failure;
806                 } endfor_nexthops(fi)
807         }
808
809         if (fi->fib_prefsrc) {
810                 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
811                     fi->fib_prefsrc != cfg->fc_dst)
812                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
813                                 goto err_inval;
814         }
815
816 link_it:
817         if ((ofi = fib_find_info(fi)) != NULL) {
818                 fi->fib_dead = 1;
819                 free_fib_info(fi);
820                 ofi->fib_treeref++;
821                 return ofi;
822         }
823
824         fi->fib_treeref++;
825         atomic_inc(&fi->fib_clntref);
826         spin_lock_bh(&fib_info_lock);
827         hlist_add_head(&fi->fib_hash,
828                        &fib_info_hash[fib_info_hashfn(fi)]);
829         if (fi->fib_prefsrc) {
830                 struct hlist_head *head;
831
832                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
833                 hlist_add_head(&fi->fib_lhash, head);
834         }
835         change_nexthops(fi) {
836                 struct hlist_head *head;
837                 unsigned int hash;
838
839                 if (!nh->nh_dev)
840                         continue;
841                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
842                 head = &fib_info_devhash[hash];
843                 hlist_add_head(&nh->nh_hash, head);
844         } endfor_nexthops(fi)
845         spin_unlock_bh(&fib_info_lock);
846         return fi;
847
848 err_inval:
849         err = -EINVAL;
850
851 failure:
852         if (fi) {
853                 fi->fib_dead = 1;
854                 free_fib_info(fi);
855         }
856
857         return ERR_PTR(err);
858 }
859
860 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
861 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
862                        struct fib_result *res, __be32 zone, __be32 mask,
863                         int prefixlen)
864 {
865         struct fib_alias *fa;
866         int nh_sel = 0;
867
868         list_for_each_entry_rcu(fa, head, fa_list) {
869                 int err;
870
871                 if (fa->fa_tos &&
872                     fa->fa_tos != flp->fl4_tos)
873                         continue;
874
875                 if (fa->fa_scope < flp->fl4_scope)
876                         continue;
877
878                 fa->fa_state |= FA_S_ACCESSED;
879
880                 err = fib_props[fa->fa_type].error;
881                 if (err == 0) {
882                         struct fib_info *fi = fa->fa_info;
883
884                         if (fi->fib_flags & RTNH_F_DEAD)
885                                 continue;
886
887                         switch (fa->fa_type) {
888                         case RTN_UNICAST:
889                         case RTN_LOCAL:
890                         case RTN_BROADCAST:
891                         case RTN_ANYCAST:
892                         case RTN_MULTICAST:
893                                 for_nexthops(fi) {
894                                         if (nh->nh_flags&RTNH_F_DEAD)
895                                                 continue;
896                                         if (!flp->oif || flp->oif == nh->nh_oif)
897                                                 break;
898                                 }
899 #ifdef CONFIG_IP_ROUTE_MULTIPATH
900                                 if (nhsel < fi->fib_nhs) {
901                                         nh_sel = nhsel;
902                                         goto out_fill_res;
903                                 }
904 #else
905                                 if (nhsel < 1) {
906                                         goto out_fill_res;
907                                 }
908 #endif
909                                 endfor_nexthops(fi);
910                                 continue;
911
912                         default:
913                                 printk(KERN_DEBUG "impossible 102\n");
914                                 return -EINVAL;
915                         }
916                 }
917                 return err;
918         }
919         return 1;
920
921 out_fill_res:
922         res->prefixlen = prefixlen;
923         res->nh_sel = nh_sel;
924         res->type = fa->fa_type;
925         res->scope = fa->fa_scope;
926         res->fi = fa->fa_info;
927         atomic_inc(&res->fi->fib_clntref);
928         return 0;
929 }
930
931 /* Find appropriate source address to this destination */
932
933 __be32 __fib_res_prefsrc(struct fib_result *res)
934 {
935         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
936 }
937
938 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
939                   u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
940                   struct fib_info *fi, unsigned int flags)
941 {
942         struct nlmsghdr *nlh;
943         struct rtmsg *rtm;
944
945         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
946         if (nlh == NULL)
947                 return -EMSGSIZE;
948
949         rtm = nlmsg_data(nlh);
950         rtm->rtm_family = AF_INET;
951         rtm->rtm_dst_len = dst_len;
952         rtm->rtm_src_len = 0;
953         rtm->rtm_tos = tos;
954         rtm->rtm_table = tb_id;
955         NLA_PUT_U32(skb, RTA_TABLE, tb_id);
956         rtm->rtm_type = type;
957         rtm->rtm_flags = fi->fib_flags;
958         rtm->rtm_scope = scope;
959         rtm->rtm_protocol = fi->fib_protocol;
960
961         if (rtm->rtm_dst_len)
962                 NLA_PUT_BE32(skb, RTA_DST, dst);
963
964         if (fi->fib_priority)
965                 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
966
967         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
968                 goto nla_put_failure;
969
970         if (fi->fib_prefsrc)
971                 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
972
973         if (fi->fib_nhs == 1) {
974                 if (fi->fib_nh->nh_gw)
975                         NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
976
977                 if (fi->fib_nh->nh_oif)
978                         NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
979 #ifdef CONFIG_NET_CLS_ROUTE
980                 if (fi->fib_nh[0].nh_tclassid)
981                         NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
982 #endif
983         }
984 #ifdef CONFIG_IP_ROUTE_MULTIPATH
985         if (fi->fib_nhs > 1) {
986                 struct rtnexthop *rtnh;
987                 struct nlattr *mp;
988
989                 mp = nla_nest_start(skb, RTA_MULTIPATH);
990                 if (mp == NULL)
991                         goto nla_put_failure;
992
993                 for_nexthops(fi) {
994                         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
995                         if (rtnh == NULL)
996                                 goto nla_put_failure;
997
998                         rtnh->rtnh_flags = nh->nh_flags & 0xFF;
999                         rtnh->rtnh_hops = nh->nh_weight - 1;
1000                         rtnh->rtnh_ifindex = nh->nh_oif;
1001
1002                         if (nh->nh_gw)
1003                                 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1004 #ifdef CONFIG_NET_CLS_ROUTE
1005                         if (nh->nh_tclassid)
1006                                 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1007 #endif
1008                         /* length of rtnetlink header + attributes */
1009                         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1010                 } endfor_nexthops(fi);
1011
1012                 nla_nest_end(skb, mp);
1013         }
1014 #endif
1015         return nlmsg_end(skb, nlh);
1016
1017 nla_put_failure:
1018         nlmsg_cancel(skb, nlh);
1019         return -EMSGSIZE;
1020 }
1021
1022 /*
1023    Update FIB if:
1024    - local address disappeared -> we must delete all the entries
1025      referring to it.
1026    - device went down -> we must shutdown all nexthops going via it.
1027  */
1028
1029 int fib_sync_down(__be32 local, struct net_device *dev, int force)
1030 {
1031         int ret = 0;
1032         int scope = RT_SCOPE_NOWHERE;
1033
1034         if (force)
1035                 scope = -1;
1036
1037         if (local && fib_info_laddrhash) {
1038                 unsigned int hash = fib_laddr_hashfn(local);
1039                 struct hlist_head *head = &fib_info_laddrhash[hash];
1040                 struct hlist_node *node;
1041                 struct fib_info *fi;
1042
1043                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1044                         if (fi->fib_prefsrc == local) {
1045                                 fi->fib_flags |= RTNH_F_DEAD;
1046                                 ret++;
1047                         }
1048                 }
1049         }
1050
1051         if (dev) {
1052                 struct fib_info *prev_fi = NULL;
1053                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1054                 struct hlist_head *head = &fib_info_devhash[hash];
1055                 struct hlist_node *node;
1056                 struct fib_nh *nh;
1057
1058                 hlist_for_each_entry(nh, node, head, nh_hash) {
1059                         struct fib_info *fi = nh->nh_parent;
1060                         int dead;
1061
1062                         BUG_ON(!fi->fib_nhs);
1063                         if (nh->nh_dev != dev || fi == prev_fi)
1064                                 continue;
1065                         prev_fi = fi;
1066                         dead = 0;
1067                         change_nexthops(fi) {
1068                                 if (nh->nh_flags&RTNH_F_DEAD)
1069                                         dead++;
1070                                 else if (nh->nh_dev == dev &&
1071                                          nh->nh_scope != scope) {
1072                                         nh->nh_flags |= RTNH_F_DEAD;
1073 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1074                                         spin_lock_bh(&fib_multipath_lock);
1075                                         fi->fib_power -= nh->nh_power;
1076                                         nh->nh_power = 0;
1077                                         spin_unlock_bh(&fib_multipath_lock);
1078 #endif
1079                                         dead++;
1080                                 }
1081 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1082                                 if (force > 1 && nh->nh_dev == dev) {
1083                                         dead = fi->fib_nhs;
1084                                         break;
1085                                 }
1086 #endif
1087                         } endfor_nexthops(fi)
1088                         if (dead == fi->fib_nhs) {
1089                                 fi->fib_flags |= RTNH_F_DEAD;
1090                                 ret++;
1091                         }
1092                 }
1093         }
1094
1095         return ret;
1096 }
1097
1098 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1099
1100 /*
1101    Dead device goes up. We wake up dead nexthops.
1102    It takes sense only on multipath routes.
1103  */
1104
1105 int fib_sync_up(struct net_device *dev)
1106 {
1107         struct fib_info *prev_fi;
1108         unsigned int hash;
1109         struct hlist_head *head;
1110         struct hlist_node *node;
1111         struct fib_nh *nh;
1112         int ret;
1113
1114         if (!(dev->flags&IFF_UP))
1115                 return 0;
1116
1117         prev_fi = NULL;
1118         hash = fib_devindex_hashfn(dev->ifindex);
1119         head = &fib_info_devhash[hash];
1120         ret = 0;
1121
1122         hlist_for_each_entry(nh, node, head, nh_hash) {
1123                 struct fib_info *fi = nh->nh_parent;
1124                 int alive;
1125
1126                 BUG_ON(!fi->fib_nhs);
1127                 if (nh->nh_dev != dev || fi == prev_fi)
1128                         continue;
1129
1130                 prev_fi = fi;
1131                 alive = 0;
1132                 change_nexthops(fi) {
1133                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1134                                 alive++;
1135                                 continue;
1136                         }
1137                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1138                                 continue;
1139                         if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1140                                 continue;
1141                         alive++;
1142                         spin_lock_bh(&fib_multipath_lock);
1143                         nh->nh_power = 0;
1144                         nh->nh_flags &= ~RTNH_F_DEAD;
1145                         spin_unlock_bh(&fib_multipath_lock);
1146                 } endfor_nexthops(fi)
1147
1148                 if (alive > 0) {
1149                         fi->fib_flags &= ~RTNH_F_DEAD;
1150                         ret++;
1151                 }
1152         }
1153
1154         return ret;
1155 }
1156
1157 /*
1158    The algorithm is suboptimal, but it provides really
1159    fair weighted route distribution.
1160  */
1161
1162 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1163 {
1164         struct fib_info *fi = res->fi;
1165         int w;
1166
1167         spin_lock_bh(&fib_multipath_lock);
1168         if (fi->fib_power <= 0) {
1169                 int power = 0;
1170                 change_nexthops(fi) {
1171                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1172                                 power += nh->nh_weight;
1173                                 nh->nh_power = nh->nh_weight;
1174                         }
1175                 } endfor_nexthops(fi);
1176                 fi->fib_power = power;
1177                 if (power <= 0) {
1178                         spin_unlock_bh(&fib_multipath_lock);
1179                         /* Race condition: route has just become dead. */
1180                         res->nh_sel = 0;
1181                         return;
1182                 }
1183         }
1184
1185
1186         /* w should be random number [0..fi->fib_power-1],
1187            it is pretty bad approximation.
1188          */
1189
1190         w = jiffies % fi->fib_power;
1191
1192         change_nexthops(fi) {
1193                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1194                         if ((w -= nh->nh_power) <= 0) {
1195                                 nh->nh_power--;
1196                                 fi->fib_power--;
1197                                 res->nh_sel = nhsel;
1198                                 spin_unlock_bh(&fib_multipath_lock);
1199                                 return;
1200                         }
1201                 }
1202         } endfor_nexthops(fi);
1203
1204         /* Race condition: route has just become dead. */
1205         res->nh_sel = 0;
1206         spin_unlock_bh(&fib_multipath_lock);
1207 }
1208 #endif