X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fxfrm%2Fxfrm_policy.c;h=843e066649cb36834dd186084ff378899c967d31;hb=4dc6ec00f6347b72312fa41dfc587d5302b05544;hp=3d931f52f897f4ae46d879674ef0a3273cad5736;hpb=98806f75ba2afc716e4d2f915d3ac7687546f9c0;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3d931f5..843e066 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -34,13 +34,6 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly = 1; - -#ifdef CONFIG_XFRM_STATISTICS -DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; -EXPORT_SYMBOL(xfrm_statistics); -#endif - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -58,6 +51,9 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); +static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, + int dir); + static inline int __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) { @@ -92,7 +88,7 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } -static inline struct dst_entry *__xfrm_dst_lookup(int tos, +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, xfrm_address_t *saddr, xfrm_address_t *daddr, int family) @@ -104,7 +100,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(int tos, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(tos, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, saddr, daddr); xfrm_policy_put_afinfo(afinfo); @@ -116,6 +112,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, xfrm_address_t *prev_daddr, int family) { + struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; struct dst_entry *dst; @@ -129,7 +126,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(tos, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -321,27 +318,27 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; -static inline unsigned int idx_hash(u32 index) +static inline unsigned int idx_hash(struct net *net, u32 index) { - return __idx_hash(index, init_net.xfrm.policy_idx_hmask); + return __idx_hash(index, net->xfrm.policy_idx_hmask); } -static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) +static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir) { - unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __sel_hash(sel, family, hmask); return (hash == hmask + 1 ? - &init_net.xfrm.policy_inexact[dir] : - init_net.xfrm.policy_bydst[dir].table + hash); + &net->xfrm.policy_inexact[dir] : + net->xfrm.policy_bydst[dir].table + hash); } -static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) { - unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __addr_hash(daddr, saddr, family, hmask); - return init_net.xfrm.policy_bydst[dir].table + hash; + return net->xfrm.policy_bydst[dir].table + hash; } static void xfrm_dst_hash_transfer(struct hlist_head *list, @@ -472,16 +469,16 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total) return 0; } -void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) +void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { read_lock_bh(&xfrm_policy_lock); - si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN]; - si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT]; - si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD]; - si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; - si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; - si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; - si->spdhcnt = init_net.xfrm.policy_idx_hmask; + si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; + si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; + si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; read_unlock_bh(&xfrm_policy_lock); } @@ -508,7 +505,7 @@ static void xfrm_hash_resize(struct work_struct *work) /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(int dir) +static u32 xfrm_gen_index(struct net *net, int dir) { static u32 idx_generator; @@ -523,7 +520,7 @@ static u32 xfrm_gen_index(int dir) idx_generator += 8; if (idx == 0) idx = 8; - list = init_net.xfrm.policy_byidx + idx_hash(idx); + list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; hlist_for_each_entry(p, entry, list, byidx) { if (p->index == idx) { @@ -553,19 +550,22 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { + struct net *net = xp_net(policy); struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; struct hlist_node *entry, *newpos; struct dst_entry *gc_list; + u32 mark = policy->mark.v & policy->mark.m; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(&policy->selector, policy->family, dir); + chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == policy->type && !selector_cmp(&pol->selector, &policy->selector) && + (mark & pol->mark.m) == pol->mark.v && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { @@ -587,27 +587,23 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - init_net.xfrm.policy_count[dir]++; + net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); - if (delpol) { - hlist_del(&delpol->bydst); - hlist_del(&delpol->byidx); - list_del(&delpol->walk.all); - init_net.xfrm.policy_count[dir]--; - } - policy->index = delpol ? delpol->index : xfrm_gen_index(dir); - hlist_add_head(&policy->byidx, init_net.xfrm.policy_byidx+idx_hash(policy->index)); + if (delpol) + __xfrm_policy_unlink(delpol, dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); + hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add(&policy->walk.all, &init_net.xfrm.policy_all); + list_add(&policy->walk.all, &net->xfrm.policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); - else if (xfrm_bydst_should_resize(&init_net, dir, NULL)) - schedule_work(&init_net.xfrm.policy_hash_work); + else if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); read_lock_bh(&xfrm_policy_lock); gc_list = NULL; @@ -641,8 +637,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, - struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, + int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) { @@ -652,10 +648,11 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(sel, sel->family, dir); + chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == type && + (mark & pol->mark.m) == pol->mark.v && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); @@ -666,10 +663,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, write_unlock_bh(&xfrm_policy_lock); return pol; } - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - list_del(&pol->walk.all); - init_net.xfrm.policy_count[dir]--; + __xfrm_policy_unlink(pol, dir); } ret = pol; break; @@ -685,8 +679,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, - int *err) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, + int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -698,10 +692,11 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = init_net.xfrm.policy_byidx + idx_hash(id); + chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, entry, chain, byidx) { - if (pol->type == type && pol->index == id) { + if (pol->type == type && pol->index == id && + (mark & pol->mark.m) == pol->mark.v) { xfrm_pol_hold(pol); if (delete) { *err = security_xfrm_policy_delete( @@ -710,10 +705,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, write_unlock_bh(&xfrm_policy_lock); return pol; } - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - list_del(&pol->walk.all); - init_net.xfrm.policy_count[dir]--; + __xfrm_policy_unlink(pol, dir); } ret = pol; break; @@ -731,7 +723,7 @@ EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0; @@ -741,7 +733,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) int i; hlist_for_each_entry(pol, entry, - &init_net.xfrm.policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; err = security_xfrm_policy_delete(pol->security); @@ -753,9 +745,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) return err; } } - for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, - init_net.xfrm.policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; @@ -775,74 +767,74 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) } #else static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { return 0; } #endif -int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) { - int dir, err = 0; + int dir, err = 0, cnt = 0; + struct xfrm_policy *dp; write_lock_bh(&xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, audit_info); if (err) goto out; for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; struct hlist_node *entry; - int i, killed; + int i; - killed = 0; again1: hlist_for_each_entry(pol, entry, - &init_net.xfrm.policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); + dp = __xfrm_policy_unlink(pol, dir); write_unlock_bh(&xfrm_policy_lock); + if (dp) + cnt++; xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, audit_info->sessionid, audit_info->secid); xfrm_policy_kill(pol); - killed++; write_lock_bh(&xfrm_policy_lock); goto again1; } - for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: hlist_for_each_entry(pol, entry, - init_net.xfrm.policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - list_del(&pol->walk.all); + dp = __xfrm_policy_unlink(pol, dir); write_unlock_bh(&xfrm_policy_lock); + if (dp) + cnt++; xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, audit_info->sessionid, audit_info->secid); xfrm_policy_kill(pol); - killed++; write_lock_bh(&xfrm_policy_lock); goto again2; } } - init_net.xfrm.policy_count[dir] -= killed; } + if (!cnt) + err = -ESRCH; atomic_inc(&flow_cache_genid); out: write_unlock_bh(&xfrm_policy_lock); @@ -850,7 +842,7 @@ out: } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(struct xfrm_policy_walk *walk, +int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { @@ -867,10 +859,10 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, write_lock_bh(&xfrm_policy_lock); if (list_empty(&walk->walk.all)) - x = list_first_entry(&init_net.xfrm.policy_all, struct xfrm_policy_walk_entry, all); + x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); - list_for_each_entry_from(x, &init_net.xfrm.policy_all, all) { + list_for_each_entry_from(x, &net->xfrm.policy_all, all) { if (x->dead) continue; pol = container_of(x, struct xfrm_policy, walk); @@ -928,6 +920,7 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, int match, ret = -ESRCH; if (pol->family != family || + (fl->mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; @@ -939,7 +932,8 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, return ret; } -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, +static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, + struct flowi *fl, u16 family, u8 dir) { int err; @@ -955,7 +949,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, return NULL; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(daddr, saddr, family, dir); + chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); @@ -972,7 +966,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, break; } } - chain = &init_net.xfrm.policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { @@ -995,14 +989,14 @@ fail: return ret; } -static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, + u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; int err = 0; #ifdef CONFIG_XFRM_SUB_POLICY - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1010,7 +1004,7 @@ static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, if (pol || err) goto end; #endif - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1051,6 +1045,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc int err = 0; if (match) { + if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { + pol = NULL; + goto out; + } err = security_xfrm_policy_lookup(pol->security, fl->secid, policy_to_flow_dir(dir)); @@ -1063,6 +1061,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc } else pol = NULL; } +out: read_unlock_bh(&xfrm_policy_lock); return pol; } @@ -1070,12 +1069,12 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); - struct hlist_head *chain = policy_hash_bysel(&pol->selector, + struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, pol->family, dir); list_add(&pol->walk.all, &net->xfrm.policy_all); hlist_add_head(&pol->bydst, chain); - hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(pol->index)); + hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); @@ -1116,6 +1115,7 @@ EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { + struct net *net = xp_net(pol); struct xfrm_policy *old_pol; #ifdef CONFIG_XFRM_SUB_POLICY @@ -1128,7 +1128,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = get_seconds(); - pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) @@ -1154,6 +1154,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) } newp->lft = old->lft; newp->curlft = old->curlft; + newp->mark = old->mark; newp->action = old->action; newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; @@ -1183,7 +1184,7 @@ int __xfrm_sk_clone_policy(struct sock *sk) } static int -xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, +xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; @@ -1191,7 +1192,7 @@ xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(local, remote); + err = afinfo->get_saddr(net, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1203,6 +1204,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { + struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); @@ -1221,7 +1223,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, local = &tmpl->saddr; family = tmpl->encap_family; if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(&tmp, remote, family); + error = xfrm_get_saddr(net, &tmp, remote, family); if (error) goto fail; local = &tmp; @@ -1325,15 +1327,28 @@ static inline int xfrm_get_tos(struct flowi *fl, int family) return tos; } -static inline struct xfrm_dst *xfrm_alloc_dst(int family) +static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct dst_ops *dst_ops; struct xfrm_dst *xdst; if (!afinfo) return ERR_PTR(-EINVAL); - xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS); + switch (family) { + case AF_INET: + dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); xfrm_policy_put_afinfo(afinfo); @@ -1357,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, return err; } -static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1366,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) if (!afinfo) return -EINVAL; - err = afinfo->fill_dst(xdst, dev); + err = afinfo->fill_dst(xdst, dev, fl); xfrm_policy_put_afinfo(afinfo); @@ -1382,6 +1398,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct flowi *fl, struct dst_entry *dst) { + struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; struct dst_entry *dst_prev = NULL; @@ -1405,7 +1422,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_hold(dst); for (; i < nx; i++) { - struct xfrm_dst *xdst = xfrm_alloc_dst(family); + struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); struct dst_entry *dst1 = &xdst->u.dst; err = PTR_ERR(xdst); @@ -1461,7 +1478,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (!dev) goto free_dst; - /* Copy neighbout for reachability confirmation */ + /* Copy neighbour for reachability confirmation */ dst0->neighbour = neigh_clone(dst->neighbour); xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); @@ -1470,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; - err = xfrm_fill_dst(xdst, dev); + err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; @@ -1535,7 +1552,7 @@ static int stale_bundle(struct dst_entry *dst); * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; @@ -1565,7 +1582,7 @@ restart: policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); err = PTR_ERR(policy); if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); goto dropdst; } } @@ -1573,14 +1590,14 @@ restart: if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || - !init_net.xfrm.policy_count[XFRM_POLICY_OUT]) + !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - policy = flow_cache_lookup(fl, dst_orig->ops->family, + policy = flow_cache_lookup(net, fl, dst_orig->ops->family, dir, xfrm_policy_lookup); err = PTR_ERR(policy); if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); goto dropdst; } } @@ -1603,7 +1620,7 @@ restart: default: case XFRM_POLICY_BLOCK: /* Prohibit the flow */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; @@ -1623,7 +1640,7 @@ restart: */ dst = xfrm_find_bundle(fl, policy, family); if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); err = PTR_ERR(dst); goto error; } @@ -1633,17 +1650,18 @@ restart: #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, + XFRM_POLICY_TYPE_MAIN, fl, family, XFRM_POLICY_OUT); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); err = PTR_ERR(pols[1]); goto error; } if (pols[1]->action == XFRM_POLICY_BLOCK) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; } @@ -1670,27 +1688,27 @@ restart: if (unlikely(nx<0)) { err = nx; - if (err == -EAGAIN && sysctl_xfrm_larval_drop) { + if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) { /* EREMOTE tells the caller to generate * a one-shot blackhole route. */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); xfrm_pol_put(policy); return -EREMOTE; } if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&init_net.xfrm.km_waitq, &wait); + add_wait_queue(&net->xfrm.km_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); schedule(); set_current_state(TASK_RUNNING); - remove_wait_queue(&init_net.xfrm.km_waitq, &wait); + remove_wait_queue(&net->xfrm.km_waitq, &wait); nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); err = -ERESTART; goto error; } @@ -1702,7 +1720,7 @@ restart: err = nx; } if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); goto error; } } @@ -1715,7 +1733,7 @@ restart: dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); err = PTR_ERR(dst); if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); goto error; } @@ -1736,9 +1754,9 @@ restart: dst_free(dst); if (pol_dead) - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD); else - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); err = -EHOSTUNREACH; goto error; } @@ -1750,7 +1768,7 @@ restart: if (unlikely(err)) { write_unlock_bh(&policy->lock); dst_free(dst); - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); goto error; } @@ -1779,10 +1797,10 @@ nopol: } EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { - int err = __xfrm_lookup(dst_p, fl, sk, flags); + int err = __xfrm_lookup(net, dst_p, fl, sk, flags); if (err == -EREMOTE) { dst_release(*dst_p); @@ -1890,6 +1908,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; @@ -1905,7 +1924,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -1918,7 +1937,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (i=skb->sp->len-1; i>=0; i--) { struct xfrm_state *x = skb->sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; } } @@ -1928,24 +1947,24 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } } if (!pol) - pol = flow_cache_lookup(&fl, family, fl_dir, + pol = flow_cache_lookup(net, &fl, family, fl_dir, xfrm_policy_lookup); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } if (!pol) { if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); - XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } return 1; @@ -1957,12 +1976,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, npols ++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } pols[1]->curlft.use_time = get_seconds(); @@ -1986,11 +2005,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (pi = 0; pi < npols; pi++) { if (pols[pi] != pol && pols[pi]->action != XFRM_POLICY_ALLOW) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); goto reject; } if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto reject_error; } for (i = 0; i < pols[pi]->xfrm_nr; i++) @@ -2014,20 +2033,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (k < -1) /* "-2 - errored_index" returned */ xerr_idx = -(2+k); - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } } if (secpath_has_nontransport(sp, k, &xerr_idx)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } xfrm_pols_put(pols, npols); return 1; } - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); reject: xfrm_secpath_reject(xerr_idx, skb, &fl); @@ -2039,15 +2058,21 @@ EXPORT_SYMBOL(__xfrm_policy_check); int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct flowi fl; + struct dst_entry *dst; + int res; if (xfrm_decode_session(skb, &fl, family) < 0) { - /* XXX: we should have something like FWDHDRERROR here. */ - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; + dst = skb_dst(skb); + + res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; + skb_dst_set(skb, dst); + return res; } EXPORT_SYMBOL(__xfrm_route_forward); @@ -2131,7 +2156,7 @@ static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_ent write_unlock(&pol->lock); } -static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) +static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *)) { struct dst_entry *gc_list = NULL; int dir; @@ -2144,11 +2169,11 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) int i; hlist_for_each_entry(pol, entry, - &init_net.xfrm.policy_inexact[dir], bydst) + &net->xfrm.policy_inexact[dir], bydst) prune_one_bundle(pol, func, &gc_list); - table = init_net.xfrm.policy_bydst[dir].table; - for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) { + table = net->xfrm.policy_bydst[dir].table; + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, table + i, bydst) prune_one_bundle(pol, func, &gc_list); } @@ -2167,14 +2192,14 @@ static int unused_bundle(struct dst_entry *dst) return !atomic_read(&dst->__refcnt); } -static void __xfrm_garbage_collect(void) +static void __xfrm_garbage_collect(struct net *net) { - xfrm_prune_bundles(unused_bundle); + xfrm_prune_bundles(net, unused_bundle); } -static int xfrm_flush_bundles(void) +static int xfrm_flush_bundles(struct net *net) { - xfrm_prune_bundles(stale_bundle); + xfrm_prune_bundles(net, stale_bundle); return 0; } @@ -2286,6 +2311,7 @@ EXPORT_SYMBOL(xfrm_bundle_ok); int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { + struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; @@ -2309,6 +2335,27 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) xfrm_policy_afinfo[afinfo->family] = afinfo; } write_unlock_bh(&xfrm_policy_afinfo_lock); + + rtnl_lock(); + for_each_net(net) { + struct dst_ops *xfrm_dst_ops; + + switch (afinfo->family) { + case AF_INET: + xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + *xfrm_dst_ops = *afinfo->dst_ops; + } + rtnl_unlock(); + return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -2339,6 +2386,22 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); +static void __net_init xfrm_dst_ops_init(struct net *net) +{ + struct xfrm_policy_afinfo *afinfo; + + read_lock_bh(&xfrm_policy_afinfo_lock); + afinfo = xfrm_policy_afinfo[AF_INET]; + if (afinfo) + net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + afinfo = xfrm_policy_afinfo[AF_INET6]; + if (afinfo) + net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; +#endif + read_unlock_bh(&xfrm_policy_afinfo_lock); +} + static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { struct xfrm_policy_afinfo *afinfo; @@ -2360,12 +2423,9 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void { struct net_device *dev = ptr; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - switch (event) { case NETDEV_DOWN: - xfrm_flush_bundles(); + xfrm_flush_bundles(dev_net(dev)); } return NOTIFY_DONE; } @@ -2375,13 +2435,33 @@ static struct notifier_block xfrm_dev_notifier = { }; #ifdef CONFIG_XFRM_STATISTICS -static int __init xfrm_statistics_init(void) +static int __net_init xfrm_statistics_init(struct net *net) { - if (snmp_mib_init((void **)xfrm_statistics, + int rv; + + if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, sizeof(struct linux_xfrm_mib)) < 0) return -ENOMEM; + rv = xfrm_proc_init(net); + if (rv < 0) + snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + return rv; +} + +static void xfrm_statistics_fini(struct net *net) +{ + xfrm_proc_fini(net); + snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); +} +#else +static int __net_init xfrm_statistics_init(struct net *net) +{ return 0; } + +static void xfrm_statistics_fini(struct net *net) +{ +} #endif static int __net_init xfrm_policy_init(struct net *net) @@ -2436,9 +2516,23 @@ out_byidx: static void xfrm_policy_fini(struct net *net) { + struct xfrm_audit audit_info; unsigned int sz; int dir; + flush_work(&net->xfrm.policy_hash_work); +#ifdef CONFIG_XFRM_SUB_POLICY + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); +#endif + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + flush_work(&xfrm_policy_gc_work); + WARN_ON(!list_empty(&net->xfrm.policy_all)); for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { @@ -2461,24 +2555,37 @@ static int __net_init xfrm_net_init(struct net *net) { int rv; + rv = xfrm_statistics_init(net); + if (rv < 0) + goto out_statistics; rv = xfrm_state_init(net); if (rv < 0) goto out_state; rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; + xfrm_dst_ops_init(net); + rv = xfrm_sysctl_init(net); + if (rv < 0) + goto out_sysctl; return 0; +out_sysctl: + xfrm_policy_fini(net); out_policy: xfrm_state_fini(net); out_state: + xfrm_statistics_fini(net); +out_statistics: return rv; } static void __net_exit xfrm_net_exit(struct net *net) { + xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); + xfrm_statistics_fini(net); } static struct pernet_operations __net_initdata xfrm_net_ops = { @@ -2489,13 +2596,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); -#ifdef CONFIG_XFRM_STATISTICS - xfrm_statistics_init(); -#endif xfrm_input_init(); -#ifdef CONFIG_XFRM_STATISTICS - xfrm_proc_init(); -#endif } #ifdef CONFIG_AUDITSYSCALL @@ -2595,7 +2696,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, u32 priority = ~0U; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir); + chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) {