[IPV6]: ROUTE: Eliminate lock for default route pointer.
authorYOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Tue, 21 Mar 2006 01:00:26 +0000 (17:00 -0800)
committerDavid S. Miller <davem@davemloft.net>
Tue, 21 Mar 2006 01:00:26 +0000 (17:00 -0800)
And prepare for more advanced router selection.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/ip6_route.h
net/ipv6/ip6_fib.c
net/ipv6/route.c

index 1f2e428..01acca0 100644 (file)
@@ -91,8 +91,6 @@ extern struct rt6_info *      rt6_add_dflt_router(struct in6_addr *gwaddr,
 
 extern void                    rt6_purge_dflt_routers(void);
 
-extern void                    rt6_reset_dflt_pointer(struct rt6_info *rt);
-
 extern void                    rt6_redirect(struct in6_addr *dest,
                                             struct in6_addr *saddr,
                                             struct neighbour *neigh,
index 1bf6d9a..2cb6149 100644 (file)
@@ -1105,7 +1105,6 @@ static int fib6_age(struct rt6_info *rt, void *arg)
        if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
                if (time_after(now, rt->rt6i_expires)) {
                        RT6_TRACE("expiring %p\n", rt);
-                       rt6_reset_dflt_pointer(rt);
                        return -1;
                }
                gc_args.more++;
index 6a4019a..f71e236 100644 (file)
@@ -74,6 +74,9 @@
 
 #define CLONE_OFFLINK_ROUTE 0
 
+#define RT6_SELECT_F_IFACE     0x1
+#define RT6_SELECT_F_REACHABLE 0x2
+
 static int ip6_rt_max_size = 4096;
 static int ip6_rt_gc_min_interval = HZ / 2;
 static int ip6_rt_gc_timeout = 60*HZ;
@@ -216,148 +219,89 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
 }
 
 /*
- *     pointer to the last default router chosen. BH is disabled locally.
+ * Default Router Selection (RFC 2461 6.3.6)
  */
-static struct rt6_info *rt6_dflt_pointer;
-static DEFINE_SPINLOCK(rt6_dflt_lock);
+static int inline rt6_check_dev(struct rt6_info *rt, int oif)
+{
+       struct net_device *dev = rt->rt6i_dev;
+       if (!oif || dev->ifindex == oif)
+               return 2;
+       if ((dev->flags & IFF_LOOPBACK) &&
+           rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
+               return 1;
+       return 0;
+}
 
-void rt6_reset_dflt_pointer(struct rt6_info *rt)
+static int inline rt6_check_neigh(struct rt6_info *rt)
 {
-       spin_lock_bh(&rt6_dflt_lock);
-       if (rt == NULL || rt == rt6_dflt_pointer) {
-               RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
-               rt6_dflt_pointer = NULL;
+       struct neighbour *neigh = rt->rt6i_nexthop;
+       int m = 0;
+       if (neigh) {
+               read_lock_bh(&neigh->lock);
+               if (neigh->nud_state & NUD_VALID)
+                       m = 1;
+               read_unlock_bh(&neigh->lock);
        }
-       spin_unlock_bh(&rt6_dflt_lock);
+       return m;
 }
 
-/* Default Router Selection (RFC 2461 6.3.6) */
-static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
+static int rt6_score_route(struct rt6_info *rt, int oif,
+                          int strict)
 {
-       struct rt6_info *match = NULL;
-       struct rt6_info *sprt;
-       int mpri = 0;
+       int m = rt6_check_dev(rt, oif);
+       if (!m && (strict & RT6_SELECT_F_IFACE))
+               return -1;
+       if (rt6_check_neigh(rt))
+               m |= 4;
+       else if (strict & RT6_SELECT_F_REACHABLE)
+               return -1;
+       return m;
+}
 
-       for (sprt = rt; sprt; sprt = sprt->u.next) {
-               struct neighbour *neigh;
-               int m = 0;
+static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
+                                  int strict)
+{
+       struct rt6_info *match = NULL, *last = NULL;
+       struct rt6_info *rt, *rt0 = *head;
+       u32 metric;
+       int mpri = -1;
 
-               if (!oif ||
-                   (sprt->rt6i_dev &&
-                    sprt->rt6i_dev->ifindex == oif))
-                       m += 8;
+       RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
+                 __FUNCTION__, head, head ? *head : NULL, oif);
 
-               if (rt6_check_expired(sprt))
+       for (rt = rt0, metric = rt0->rt6i_metric;
+            rt && rt->rt6i_metric == metric;
+            rt = rt->u.next) {
+               int m;
+
+               if (rt6_check_expired(rt))
                        continue;
 
-               if (sprt == rt6_dflt_pointer)
-                       m += 4;
-
-               if ((neigh = sprt->rt6i_nexthop) != NULL) {
-                       read_lock_bh(&neigh->lock);
-                       switch (neigh->nud_state) {
-                       case NUD_REACHABLE:
-                               m += 3;
-                               break;
-
-                       case NUD_STALE:
-                       case NUD_DELAY:
-                       case NUD_PROBE:
-                               m += 2;
-                               break;
-
-                       case NUD_NOARP:
-                       case NUD_PERMANENT:
-                               m += 1;
-                               break;
-
-                       case NUD_INCOMPLETE:
-                       default:
-                               read_unlock_bh(&neigh->lock);
-                               continue;
-                       }
-                       read_unlock_bh(&neigh->lock);
-               } else {
+               last = rt;
+
+               m = rt6_score_route(rt, oif, strict);
+               if (m < 0)
                        continue;
-               }
 
-               if (m > mpri || m >= 12) {
-                       match = sprt;
+               if (m > mpri) {
+                       match = rt;
                        mpri = m;
-                       if (m >= 12) {
-                               /* we choose the last default router if it
-                                * is in (probably) reachable state.
-                                * If route changed, we should do pmtu
-                                * discovery. --yoshfuji
-                                */
-                               break;
-                       }
                }
        }
 
-       spin_lock(&rt6_dflt_lock);
-       if (!match) {
-               /*
-                *      No default routers are known to be reachable.
-                *      SHOULD round robin
-                */
-               if (rt6_dflt_pointer) {
-                       for (sprt = rt6_dflt_pointer->u.next;
-                            sprt; sprt = sprt->u.next) {
-                               if (sprt->u.dst.obsolete <= 0 &&
-                                   sprt->u.dst.error == 0 &&
-                                   !rt6_check_expired(sprt)) {
-                                       match = sprt;
-                                       break;
-                               }
-                       }
-                       for (sprt = rt;
-                            !match && sprt;
-                            sprt = sprt->u.next) {
-                               if (sprt->u.dst.obsolete <= 0 &&
-                                   sprt->u.dst.error == 0 &&
-                                   !rt6_check_expired(sprt)) {
-                                       match = sprt;
-                                       break;
-                               }
-                               if (sprt == rt6_dflt_pointer)
-                                       break;
-                       }
-               }
-       }
-
-       if (match) {
-               if (rt6_dflt_pointer != match)
-                       RT6_TRACE("changed default router: %p->%p\n",
-                                 rt6_dflt_pointer, match);
-               rt6_dflt_pointer = match;
+       if (!match &&
+           (strict & RT6_SELECT_F_REACHABLE) &&
+           last && last != rt0) {
+               /* no entries matched; do round-robin */
+               *head = rt0->u.next;
+               rt0->u.next = last->u.next;
+               last->u.next = rt0;
        }
-       spin_unlock(&rt6_dflt_lock);
 
-       if (!match) {
-               /*
-                * Last Resort: if no default routers found, 
-                * use addrconf default route.
-                * We don't record this route.
-                */
-               for (sprt = ip6_routing_table.leaf;
-                    sprt; sprt = sprt->u.next) {
-                       if (!rt6_check_expired(sprt) &&
-                           (sprt->rt6i_flags & RTF_DEFAULT) &&
-                           (!oif ||
-                            (sprt->rt6i_dev &&
-                             sprt->rt6i_dev->ifindex == oif))) {
-                               match = sprt;
-                               break;
-                       }
-               }
-               if (!match) {
-                       /* no default route.  give up. */
-                       match = &ip6_null_entry;
-               }
-       }
+       RT6_TRACE("%s() => %p, score=%d\n",
+                 __FUNCTION__, match, mpri);
 
-       return match;
+       return (match ? match : &ip6_null_entry);
 }
 
 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
@@ -542,7 +486,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
        int attempts = 3;
        int err;
 
-       strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
+       strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
 
 relookup:
        read_lock_bh(&rt6_lock);
@@ -558,8 +502,9 @@ restart:
                goto out;
        }
        if (rt->rt6i_flags & RTF_DEFAULT) {
-               if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
-                       rt = rt6_best_dflt(rt, fl->oif);
+               rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
+               if (rt == &ip6_null_entry)
+                       rt = rt6_select(&fn->leaf, fl->oif, strict);
        } else {
                rt = rt6_device_match(rt, fl->oif, strict);
                BACKTRACK();
@@ -1025,8 +970,6 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct
 
        write_lock_bh(&rt6_lock);
 
-       rt6_reset_dflt_pointer(NULL);
-
        err = fib6_del(rt, nlh, _rtattr, req);
        dst_release(&rt->u.dst);
 
@@ -1341,8 +1284,6 @@ restart:
                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
                        dst_hold(&rt->u.dst);
 
-                       rt6_reset_dflt_pointer(NULL);
-
                        read_unlock_bh(&rt6_lock);
 
                        ip6_del_rt(rt, NULL, NULL, NULL);