ipv4: pass current value of rt_genid into rt_hash
[safe/jmp/linux-2.6] / net / ipv4 / route.c
index 92f90ae..e4e37ed 100644 (file)
@@ -5,8 +5,6 @@
  *
  *             ROUTE - implementation of the IP router.
  *
- * Version:    $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $
- *
  * Authors:    Ross Biro
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *             Alan Cox, <gw4pts@gw4pts.ampr.org>
@@ -134,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly      = 10 * 60 * HZ;
 
 static void rt_worker_func(struct work_struct *work);
 static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
-static struct timer_list rt_secret_timer;
 
 /*
  *     Interface to generic destination cache.
@@ -160,7 +157,7 @@ static struct dst_ops ipv4_dst_ops = {
        .negative_advice =      ipv4_negative_advice,
        .link_failure =         ipv4_link_failure,
        .update_pmtu =          ip_rt_update_pmtu,
-       .local_out =            ip_local_out,
+       .local_out =            __ip_local_out,
        .entry_size =           sizeof(struct rtable),
        .entries =              ATOMIC_INIT(0),
 };
@@ -259,11 +256,12 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
 #define RT_CACHE_STAT_INC(field) \
        (__raw_get_cpu_var(rt_cache_stat).field++)
 
-static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
+static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
+               int genid)
 {
        return jhash_3words((__force u32)(__be32)(daddr),
                            (__force u32)(__be32)(saddr),
-                           idx, atomic_read(&rt_genid))
+                           idx, genid)
                & rt_hash_mask;
 }
 
@@ -781,7 +779,7 @@ static void rt_worker_func(struct work_struct *work)
  * many times (2^24) without giving recent rt_genid.
  * Jenkins hash is strong enough that litle changes of rt_genid are OK.
  */
-static void rt_cache_invalidate(void)
+static void rt_cache_invalidate(struct net *net)
 {
        unsigned char shuffle;
 
@@ -793,9 +791,9 @@ static void rt_cache_invalidate(void)
  * delay < 0  : invalidate cache (fast : entries will be deleted later)
  * delay >= 0 : invalidate & flush cache (can be long)
  */
-void rt_cache_flush(int delay)
+void rt_cache_flush(struct net *net, int delay)
 {
-       rt_cache_invalidate();
+       rt_cache_invalidate(net);
        if (delay >= 0)
                rt_do_flush(!in_softirq());
 }
@@ -803,10 +801,11 @@ void rt_cache_flush(int delay)
 /*
  * We change rt_genid and let gc do the cleanup
  */
-static void rt_secret_rebuild(unsigned long dummy)
+static void rt_secret_rebuild(unsigned long __net)
 {
-       rt_cache_invalidate();
-       mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
+       struct net *net = (struct net *)__net;
+       rt_cache_invalidate(net);
+       mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
 }
 
 /*
@@ -1182,7 +1181,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
        for (i = 0; i < 2; i++) {
                for (k = 0; k < 2; k++) {
-                       unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+                       unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+                                               atomic_read(&rt_genid));
 
                        rthp=&rt_hash_table[hash].chain;
 
@@ -1297,7 +1297,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
                } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
                           rt->u.dst.expires) {
                        unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
-                                               rt->fl.oif);
+                                               rt->fl.oif,
+                                               atomic_read(&rt_genid));
 #if RT_CACHE_DEBUG >= 1
                        printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
                                          NIPQUAD_FMT "/%02x dropped\n",
@@ -1446,7 +1447,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
        for (k = 0; k < 2; k++) {
                for (i = 0; i < 2; i++) {
-                       unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+                       unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+                                               atomic_read(&rt_genid));
 
                        rcu_read_lock();
                        for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1711,7 +1713,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        RT_CACHE_STAT_INC(in_slow_mc);
 
        in_dev_put(in_dev);
-       hash = rt_hash(daddr, saddr, dev->ifindex);
+       hash = rt_hash(daddr, saddr, dev->ifindex, atomic_read(&rt_genid));
        return rt_intern_hash(hash, rth, &skb->rtable);
 
 e_nobufs:
@@ -1792,7 +1794,7 @@ static int __mkroute_input(struct sk_buff *skb,
        if (err)
                flags |= RTCF_DIRECTSRC;
 
-       if (out_dev == in_dev && err && !(flags & RTCF_MASQ) &&
+       if (out_dev == in_dev && err &&
            (IN_DEV_SHARED_MEDIA(out_dev) ||
             inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
                flags |= RTCF_DOREDIRECT;
@@ -1872,7 +1874,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
                return err;
 
        /* put it into the cache */
-       hash = rt_hash(daddr, saddr, fl->iif);
+       hash = rt_hash(daddr, saddr, fl->iif, atomic_read(&rt_genid));
        return rt_intern_hash(hash, rth, &skb->rtable);
 }
 
@@ -2028,7 +2030,7 @@ local_input:
                rth->rt_flags   &= ~RTCF_LOCAL;
        }
        rth->rt_type    = res.type;
-       hash = rt_hash(daddr, saddr, fl.iif);
+       hash = rt_hash(daddr, saddr, fl.iif, atomic_read(&rt_genid));
        err = rt_intern_hash(hash, rth, &skb->rtable);
        goto done;
 
@@ -2079,7 +2081,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
        net = dev_net(dev);
        tos &= IPTOS_RT_MASK;
-       hash = rt_hash(daddr, saddr, iif);
+       hash = rt_hash(daddr, saddr, iif, atomic_read(&rt_genid));
 
        rcu_read_lock();
        for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2268,7 +2270,8 @@ static int ip_mkroute_output(struct rtable **rp,
        int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
        unsigned hash;
        if (err == 0) {
-               hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif);
+               hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
+                              atomic_read(&rt_genid));
                err = rt_intern_hash(hash, rth, rp);
        }
 
@@ -2480,7 +2483,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
        unsigned hash;
        struct rtable *rth;
 
-       hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif);
+       hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif,
+                      atomic_read(&rt_genid));
 
        rcu_read_lock_bh();
        for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2827,19 +2831,27 @@ done:
 
 void ip_rt_multicast_event(struct in_device *in_dev)
 {
-       rt_cache_flush(0);
+       rt_cache_flush(dev_net(in_dev->dev), 0);
 }
 
 #ifdef CONFIG_SYSCTL
-static int flush_delay;
-
 static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
                                        struct file *filp, void __user *buffer,
                                        size_t *lenp, loff_t *ppos)
 {
        if (write) {
+               int flush_delay;
+               struct net *net;
+               static DEFINE_MUTEX(flush_mutex);
+
+               mutex_lock(&flush_mutex);
+               ctl->data = &flush_delay;
                proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-               rt_cache_flush(flush_delay);
+               ctl->data = NULL;
+               mutex_unlock(&flush_mutex);
+
+               net = (struct net *)ctl->extra1;
+               rt_cache_flush(net, flush_delay);
                return 0;
        }
 
@@ -2855,25 +2867,18 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
                                                size_t newlen)
 {
        int delay;
+       struct net *net;
        if (newlen != sizeof(int))
                return -EINVAL;
        if (get_user(delay, (int __user *)newval))
                return -EFAULT;
-       rt_cache_flush(delay);
+       net = (struct net *)table->extra1;
+       rt_cache_flush(net, delay);
        return 0;
 }
 
 ctl_table ipv4_route_table[] = {
        {
-               .ctl_name       = NET_IPV4_ROUTE_FLUSH,
-               .procname       = "flush",
-               .data           = &flush_delay,
-               .maxlen         = sizeof(int),
-               .mode           = 0200,
-               .proc_handler   = &ipv4_sysctl_rtcache_flush,
-               .strategy       = &ipv4_sysctl_rtcache_flush_strategy,
-       },
-       {
                .ctl_name       = NET_IPV4_ROUTE_GC_THRESH,
                .procname       = "gc_thresh",
                .data           = &ipv4_dst_ops.gc_thresh,
@@ -3011,8 +3016,93 @@ ctl_table ipv4_route_table[] = {
        },
        { .ctl_name = 0 }
 };
+
+static __net_initdata struct ctl_path ipv4_route_path[] = {
+       { .procname = "net", .ctl_name = CTL_NET, },
+       { .procname = "ipv4", .ctl_name = NET_IPV4, },
+       { .procname = "route", .ctl_name = NET_IPV4_ROUTE, },
+       { },
+};
+
+
+static struct ctl_table ipv4_route_flush_table[] = {
+       {
+               .ctl_name       = NET_IPV4_ROUTE_FLUSH,
+               .procname       = "flush",
+               .maxlen         = sizeof(int),
+               .mode           = 0200,
+               .proc_handler   = &ipv4_sysctl_rtcache_flush,
+               .strategy       = &ipv4_sysctl_rtcache_flush_strategy,
+       },
+       { .ctl_name = 0 },
+};
+
+static __net_init int sysctl_route_net_init(struct net *net)
+{
+       struct ctl_table *tbl;
+
+       tbl = ipv4_route_flush_table;
+       if (net != &init_net) {
+               tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
+               if (tbl == NULL)
+                       goto err_dup;
+       }
+       tbl[0].extra1 = net;
+
+       net->ipv4.route_hdr =
+               register_net_sysctl_table(net, ipv4_route_path, tbl);
+       if (net->ipv4.route_hdr == NULL)
+               goto err_reg;
+       return 0;
+
+err_reg:
+       if (tbl != ipv4_route_flush_table)
+               kfree(tbl);
+err_dup:
+       return -ENOMEM;
+}
+
+static __net_exit void sysctl_route_net_exit(struct net *net)
+{
+       struct ctl_table *tbl;
+
+       tbl = net->ipv4.route_hdr->ctl_table_arg;
+       unregister_net_sysctl_table(net->ipv4.route_hdr);
+       BUG_ON(tbl == ipv4_route_flush_table);
+       kfree(tbl);
+}
+
+static __net_initdata struct pernet_operations sysctl_route_ops = {
+       .init = sysctl_route_net_init,
+       .exit = sysctl_route_net_exit,
+};
 #endif
 
+
+static __net_init int rt_secret_timer_init(struct net *net)
+{
+       net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
+       net->ipv4.rt_secret_timer.data = (unsigned long)net;
+       init_timer_deferrable(&net->ipv4.rt_secret_timer);
+
+       net->ipv4.rt_secret_timer.expires =
+               jiffies + net_random() % ip_rt_secret_interval +
+               ip_rt_secret_interval;
+       add_timer(&net->ipv4.rt_secret_timer);
+       return 0;
+}
+
+static __net_exit void rt_secret_timer_exit(struct net *net)
+{
+       del_timer_sync(&net->ipv4.rt_secret_timer);
+}
+
+static __net_initdata struct pernet_operations rt_secret_timer_ops = {
+       .init = rt_secret_timer_init,
+       .exit = rt_secret_timer_exit,
+};
+
+
 #ifdef CONFIG_NET_CLS_ROUTE
 struct ip_rt_acct *ip_rt_acct __read_mostly;
 #endif /* CONFIG_NET_CLS_ROUTE */
@@ -3065,19 +3155,14 @@ int __init ip_rt_init(void)
        devinet_init();
        ip_fib_init();
 
-       rt_secret_timer.function = rt_secret_rebuild;
-       rt_secret_timer.data = 0;
-       init_timer_deferrable(&rt_secret_timer);
-
        /* All the timers, started at system startup tend
           to synchronize. Perturb it a bit.
         */
        schedule_delayed_work(&expires_work,
                net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
 
-       rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
-               ip_rt_secret_interval;
-       add_timer(&rt_secret_timer);
+       if (register_pernet_subsys(&rt_secret_timer_ops))
+               printk(KERN_ERR "Unable to setup rt_secret_timer\n");
 
        if (ip_rt_proc_init())
                printk(KERN_ERR "Unable to create route proc files\n");
@@ -3087,6 +3172,9 @@ int __init ip_rt_init(void)
 #endif
        rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
 
+#ifdef CONFIG_SYSCTL
+       register_pernet_subsys(&sysctl_route_ops);
+#endif
        return rc;
 }