netns xfrm: per-netns xfrm_policy_byidx hash
[safe/jmp/linux-2.6] / net / xfrm / xfrm_policy.c
index f072598..700cdd7 100644 (file)
@@ -46,7 +46,6 @@ EXPORT_SYMBOL(xfrm_cfg_mutex);
 
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
-static struct list_head xfrm_policy_all;
 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
 EXPORT_SYMBOL(xfrm_policy_count);
 
@@ -55,7 +54,6 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 
 static struct kmem_cache *xfrm_dst_cache __read_mostly;
 
-static struct work_struct xfrm_policy_gc_work;
 static HLIST_HEAD(xfrm_policy_gc_list);
 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 
@@ -229,13 +227,14 @@ expired:
  * SPD calls.
  */
 
-struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
+struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 {
        struct xfrm_policy *policy;
 
        policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 
        if (policy) {
+               write_pnet(&policy->xp_net, net);
                INIT_LIST_HEAD(&policy->walk.all);
                INIT_HLIST_NODE(&policy->bydst);
                INIT_HLIST_NODE(&policy->byidx);
@@ -296,6 +295,7 @@ static void xfrm_policy_gc_task(struct work_struct *work)
        hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
                xfrm_policy_gc_kill(policy);
 }
+static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
 
 /* Rule must be locked. Release descentant resources, announce
  * entry dead. The rule must be unlinked from lists to the moment.
@@ -315,9 +315,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
                return;
        }
 
-       spin_lock(&xfrm_policy_gc_lock);
+       spin_lock_bh(&xfrm_policy_gc_lock);
        hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
-       spin_unlock(&xfrm_policy_gc_lock);
+       spin_unlock_bh(&xfrm_policy_gc_lock);
 
        schedule_work(&xfrm_policy_gc_work);
 }
@@ -329,7 +329,6 @@ struct xfrm_policy_hash {
 
 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
-static struct hlist_head *xfrm_policy_byidx __read_mostly;
 static unsigned int xfrm_idx_hmask __read_mostly;
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
 
@@ -438,7 +437,7 @@ static void xfrm_byidx_resize(int total)
        unsigned int hmask = xfrm_idx_hmask;
        unsigned int nhashmask = xfrm_new_hash_mask(hmask);
        unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
-       struct hlist_head *oidx = xfrm_policy_byidx;
+       struct hlist_head *oidx = init_net.xfrm.policy_byidx;
        struct hlist_head *nidx = xfrm_hash_alloc(nsize);
        int i;
 
@@ -450,7 +449,7 @@ static void xfrm_byidx_resize(int total)
        for (i = hmask; i >= 0; i--)
                xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
 
-       xfrm_policy_byidx = nidx;
+       init_net.xfrm.policy_byidx = nidx;
        xfrm_idx_hmask = nhashmask;
 
        write_unlock_bh(&xfrm_policy_lock);
@@ -521,7 +520,7 @@ static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
 
 /* Generate new index... KAME seems to generate them ordered by cost
  * of an absolute inpredictability of ordering of rules. This will not pass. */
-static u32 xfrm_gen_index(u8 type, int dir)
+static u32 xfrm_gen_index(int dir)
 {
        static u32 idx_generator;
 
@@ -536,7 +535,7 @@ static u32 xfrm_gen_index(u8 type, int dir)
                idx_generator += 8;
                if (idx == 0)
                        idx = 8;
-               list = xfrm_policy_byidx + idx_hash(idx);
+               list = init_net.xfrm.policy_byidx + idx_hash(idx);
                found = 0;
                hlist_for_each_entry(p, entry, list, byidx) {
                        if (p->index == idx) {
@@ -608,13 +607,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
                list_del(&delpol->walk.all);
                xfrm_policy_count[dir]--;
        }
-       policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
-       hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
+       policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
+       hlist_add_head(&policy->byidx, init_net.xfrm.policy_byidx+idx_hash(policy->index));
        policy->curlft.add_time = get_seconds();
        policy->curlft.use_time = 0;
        if (!mod_timer(&policy->timer, jiffies + HZ))
                xfrm_pol_hold(policy);
-       list_add(&policy->walk.all, &xfrm_policy_all);
+       list_add(&policy->walk.all, &init_net.xfrm.policy_all);
        write_unlock_bh(&xfrm_policy_lock);
 
        if (delpol)
@@ -711,7 +710,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
 
        *err = 0;
        write_lock_bh(&xfrm_policy_lock);
-       chain = xfrm_policy_byidx + idx_hash(id);
+       chain = init_net.xfrm.policy_byidx + idx_hash(id);
        ret = NULL;
        hlist_for_each_entry(pol, entry, chain, byidx) {
                if (pol->type == type && pol->index == id) {
@@ -880,10 +879,10 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk,
 
        write_lock_bh(&xfrm_policy_lock);
        if (list_empty(&walk->walk.all))
-               x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all);
+               x = list_first_entry(&init_net.xfrm.policy_all, struct xfrm_policy_walk_entry, all);
        else
                x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
-       list_for_each_entry_from(x, &xfrm_policy_all, all) {
+       list_for_each_entry_from(x, &init_net.xfrm.policy_all, all) {
                if (x->dead)
                        continue;
                pol = container_of(x, struct xfrm_policy, walk);
@@ -1085,9 +1084,9 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
        struct hlist_head *chain = policy_hash_bysel(&pol->selector,
                                                     pol->family, dir);
 
-       list_add(&pol->walk.all, &xfrm_policy_all);
+       list_add(&pol->walk.all, &init_net.xfrm.policy_all);
        hlist_add_head(&pol->bydst, chain);
-       hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
+       hlist_add_head(&pol->byidx, init_net.xfrm.policy_byidx+idx_hash(pol->index));
        xfrm_policy_count[dir]++;
        xfrm_pol_hold(pol);
 
@@ -1138,7 +1137,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
        sk->sk_policy[dir] = pol;
        if (pol) {
                pol->curlft.add_time = get_seconds();
-               pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
+               pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
                __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
        }
        if (old_pol)
@@ -1153,7 +1152,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 
 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 {
-       struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
+       struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
 
        if (newp) {
                newp->selector = old->selector;
@@ -1691,11 +1690,11 @@ restart:
                        if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
                                DECLARE_WAITQUEUE(wait, current);
 
-                               add_wait_queue(&km_waitq, &wait);
+                               add_wait_queue(&init_net.xfrm.km_waitq, &wait);
                                set_current_state(TASK_INTERRUPTIBLE);
                                schedule();
                                set_current_state(TASK_RUNNING);
-                               remove_wait_queue(&km_waitq, &wait);
+                               remove_wait_queue(&init_net.xfrm.km_waitq, &wait);
 
                                nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
@@ -2394,12 +2393,13 @@ static int __init xfrm_statistics_init(void)
 }
 #endif
 
-static void __init xfrm_policy_init(void)
+static int __net_init xfrm_policy_init(struct net *net)
 {
        unsigned int hmask, sz;
        int dir;
 
-       xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
+       if (net_eq(net, &init_net))
+               xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
                                           sizeof(struct xfrm_dst),
                                           0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                                           NULL);
@@ -2407,10 +2407,10 @@ static void __init xfrm_policy_init(void)
        hmask = 8 - 1;
        sz = (hmask+1) * sizeof(struct hlist_head);
 
-       xfrm_policy_byidx = xfrm_hash_alloc(sz);
+       net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
+       if (!net->xfrm.policy_byidx)
+               goto out_byidx;
        xfrm_idx_hmask = hmask;
-       if (!xfrm_policy_byidx)
-               panic("XFRM: failed to allocate byidx hash\n");
 
        for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
                struct xfrm_policy_hash *htab;
@@ -2424,18 +2424,61 @@ static void __init xfrm_policy_init(void)
                        panic("XFRM: failed to allocate bydst hash\n");
        }
 
-       INIT_LIST_HEAD(&xfrm_policy_all);
-       INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
-       register_netdevice_notifier(&xfrm_dev_notifier);
+       INIT_LIST_HEAD(&net->xfrm.policy_all);
+       if (net_eq(net, &init_net))
+               register_netdevice_notifier(&xfrm_dev_notifier);
+       return 0;
+
+out_byidx:
+       return -ENOMEM;
+}
+
+static void xfrm_policy_fini(struct net *net)
+{
+       unsigned int sz;
+
+       WARN_ON(!list_empty(&net->xfrm.policy_all));
+
+       sz = (xfrm_idx_hmask + 1) * sizeof(struct hlist_head);
+       WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
+       xfrm_hash_free(net->xfrm.policy_byidx, sz);
+}
+
+static int __net_init xfrm_net_init(struct net *net)
+{
+       int rv;
+
+       rv = xfrm_state_init(net);
+       if (rv < 0)
+               goto out_state;
+       rv = xfrm_policy_init(net);
+       if (rv < 0)
+               goto out_policy;
+       return 0;
+
+out_policy:
+       xfrm_state_fini(net);
+out_state:
+       return rv;
 }
 
+static void __net_exit xfrm_net_exit(struct net *net)
+{
+       xfrm_policy_fini(net);
+       xfrm_state_fini(net);
+}
+
+static struct pernet_operations __net_initdata xfrm_net_ops = {
+       .init = xfrm_net_init,
+       .exit = xfrm_net_exit,
+};
+
 void __init xfrm_init(void)
 {
+       register_pernet_subsys(&xfrm_net_ops);
 #ifdef CONFIG_XFRM_STATISTICS
        xfrm_statistics_init();
 #endif
-       xfrm_state_init();
-       xfrm_policy_init();
        xfrm_input_init();
 #ifdef CONFIG_XFRM_STATISTICS
        xfrm_proc_init();
@@ -2455,13 +2498,11 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
 
        switch(sel->family) {
        case AF_INET:
-               audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
-                                NIPQUAD(sel->saddr.a4));
+               audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
                if (sel->prefixlen_s != 32)
                        audit_log_format(audit_buf, " src_prefixlen=%d",
                                         sel->prefixlen_s);
-               audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
-                                NIPQUAD(sel->daddr.a4));
+               audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
                if (sel->prefixlen_d != 32)
                        audit_log_format(audit_buf, " dst_prefixlen=%d",
                                         sel->prefixlen_d);