X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fxfrm%2Fxfrm_policy.c;h=f4ea3a08e5a1369cf930110b28dfb85bab45fb1b;hb=a1aa3483041bd3691c7f029272ccef4ce70bd957;hp=9fc4c315f6cd9702a223b5758d3f61e26c89808b;hpb=b791160b5af4ea95c72fb59d13079664beca1963;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9fc4c31..f4ea3a0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -34,33 +34,25 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly; - -#ifdef CONFIG_XFRM_STATISTICS -DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; -EXPORT_SYMBOL(xfrm_statistics); -#endif - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); +static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); +static struct dst_entry *xfrm_policy_sk_bundles; static DEFINE_RWLOCK(xfrm_policy_lock); -unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_count); - static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; static struct kmem_cache *xfrm_dst_cache __read_mostly; -static struct work_struct xfrm_policy_gc_work; -static HLIST_HEAD(xfrm_policy_gc_list); -static DEFINE_SPINLOCK(xfrm_policy_gc_lock); - static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); +static int stale_bundle(struct dst_entry *dst); + +static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, + int dir); static inline int __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) @@ -96,25 +88,53 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, + xfrm_address_t *daddr, + int family) +{ + struct xfrm_policy_afinfo *afinfo; + struct dst_entry *dst; + + afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return ERR_PTR(-EAFNOSUPPORT); + + dst = afinfo->dst_lookup(net, tos, saddr, daddr); + + xfrm_policy_put_afinfo(afinfo); + + return dst; +} + static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, + xfrm_address_t *prev_saddr, + xfrm_address_t *prev_daddr, int family) { + struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; - struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; - if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) + if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { saddr = x->coaddr; - if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) + daddr = prev_daddr; + } + if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { + saddr = prev_saddr; daddr = x->coaddr; + } - afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return ERR_PTR(-EAFNOSUPPORT); + dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); + + if (!IS_ERR(dst)) { + if (prev_saddr != saddr) + memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); + if (prev_daddr != daddr) + memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); + } - dst = afinfo->dst_lookup(tos, saddr, daddr); - xfrm_policy_put_afinfo(afinfo); return dst; } @@ -136,7 +156,7 @@ static void xfrm_policy_timer(unsigned long data) read_lock(&xp->lock); - if (xp->dead) + if (unlikely(xp->walk.dead)) goto out; dir = xfrm_policy_id2dir(xp->index); @@ -196,24 +216,56 @@ expired: xfrm_pol_put(xp); } +static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) +{ + struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); + + if (unlikely(pol->walk.dead)) + flo = NULL; + else + xfrm_pol_hold(pol); + + return flo; +} + +static int xfrm_policy_flo_check(struct flow_cache_object *flo) +{ + struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); + + return !pol->walk.dead; +} + +static void xfrm_policy_flo_delete(struct flow_cache_object *flo) +{ + xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); +} + +static const struct flow_cache_ops xfrm_policy_fc_ops = { + .get = xfrm_policy_flo_get, + .check = xfrm_policy_flo_check, + .delete = xfrm_policy_flo_delete, +}; /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ -struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) +struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) { struct xfrm_policy *policy; policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { + write_pnet(&policy->xp_net, net); + INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); atomic_set(&policy->refcnt, 1); setup_timer(&policy->timer, xfrm_policy_timer, (unsigned long)policy); + policy->flo.ops = &xfrm_policy_fc_ops; } return policy; } @@ -223,108 +275,55 @@ EXPORT_SYMBOL(xfrm_policy_alloc); void xfrm_policy_destroy(struct xfrm_policy *policy) { - BUG_ON(!policy->dead); - - BUG_ON(policy->bundles); + BUG_ON(!policy->walk.dead); if (del_timer(&policy->timer)) BUG(); - security_xfrm_policy_free(policy); + security_xfrm_policy_free(policy->security); kfree(policy); } EXPORT_SYMBOL(xfrm_policy_destroy); -static void xfrm_policy_gc_kill(struct xfrm_policy *policy) -{ - struct dst_entry *dst; - - while ((dst = policy->bundles) != NULL) { - policy->bundles = dst->next; - dst_free(dst); - } - - if (del_timer(&policy->timer)) - atomic_dec(&policy->refcnt); - - if (atomic_read(&policy->refcnt) > 1) - flow_cache_flush(); - - xfrm_pol_put(policy); -} - -static void xfrm_policy_gc_task(struct work_struct *work) -{ - struct xfrm_policy *policy; - struct hlist_node *entry, *tmp; - struct hlist_head gc_list; - - spin_lock_bh(&xfrm_policy_gc_lock); - gc_list.first = xfrm_policy_gc_list.first; - INIT_HLIST_HEAD(&xfrm_policy_gc_list); - spin_unlock_bh(&xfrm_policy_gc_lock); - - hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) - xfrm_policy_gc_kill(policy); -} - /* Rule must be locked. Release descentant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */ static void xfrm_policy_kill(struct xfrm_policy *policy) { - int dead; + policy->walk.dead = 1; - write_lock_bh(&policy->lock); - dead = policy->dead; - policy->dead = 1; - write_unlock_bh(&policy->lock); + atomic_inc(&policy->genid); - if (unlikely(dead)) { - WARN_ON(1); - return; - } - - spin_lock(&xfrm_policy_gc_lock); - hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); - spin_unlock(&xfrm_policy_gc_lock); + if (del_timer(&policy->timer)) + xfrm_pol_put(policy); - schedule_work(&xfrm_policy_gc_work); + xfrm_pol_put(policy); } -struct xfrm_policy_hash { - struct hlist_head *table; - unsigned int hmask; -}; - -static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; -static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; -static struct hlist_head *xfrm_policy_byidx __read_mostly; -static unsigned int xfrm_idx_hmask __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; -static inline unsigned int idx_hash(u32 index) +static inline unsigned int idx_hash(struct net *net, u32 index) { - return __idx_hash(index, xfrm_idx_hmask); + return __idx_hash(index, net->xfrm.policy_idx_hmask); } -static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) +static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __sel_hash(sel, family, hmask); return (hash == hmask + 1 ? - &xfrm_policy_inexact[dir] : - xfrm_policy_bydst[dir].table + hash); + &net->xfrm.policy_inexact[dir] : + net->xfrm.policy_bydst[dir].table + hash); } -static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __addr_hash(daddr, saddr, family, hmask); - return xfrm_policy_bydst[dir].table + hash; + return net->xfrm.policy_bydst[dir].table + hash; } static void xfrm_dst_hash_transfer(struct hlist_head *list, @@ -379,12 +378,12 @@ static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) return ((old_hmask + 1) << 1) - 1; } -static void xfrm_bydst_resize(int dir) +static void xfrm_bydst_resize(struct net *net, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = xfrm_policy_bydst[dir].table; + struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; struct hlist_head *ndst = xfrm_hash_alloc(nsize); int i; @@ -396,20 +395,20 @@ static void xfrm_bydst_resize(int dir) for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); - xfrm_policy_bydst[dir].table = ndst; - xfrm_policy_bydst[dir].hmask = nhashmask; + net->xfrm.policy_bydst[dir].table = ndst; + net->xfrm.policy_bydst[dir].hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } -static void xfrm_byidx_resize(int total) +static void xfrm_byidx_resize(struct net *net, int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *oidx = xfrm_policy_byidx; + struct hlist_head *oidx = net->xfrm.policy_byidx; struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; @@ -421,18 +420,18 @@ static void xfrm_byidx_resize(int total) for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); - xfrm_policy_byidx = nidx; - xfrm_idx_hmask = nhashmask; + net->xfrm.policy_byidx = nidx; + net->xfrm.policy_idx_hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } -static inline int xfrm_bydst_should_resize(int dir, int *total) +static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) { - unsigned int cnt = xfrm_policy_count[dir]; - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int cnt = net->xfrm.policy_count[dir]; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; if (total) *total += cnt; @@ -444,9 +443,9 @@ static inline int xfrm_bydst_should_resize(int dir, int *total) return 0; } -static inline int xfrm_byidx_should_resize(int total) +static inline int xfrm_byidx_should_resize(struct net *net, int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; if ((hmask + 1) < xfrm_policy_hashmax && total > hmask) @@ -455,44 +454,43 @@ static inline int xfrm_byidx_should_resize(int total) return 0; } -void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) +void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { read_lock_bh(&xfrm_policy_lock); - si->incnt = xfrm_policy_count[XFRM_POLICY_IN]; - si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT]; - si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD]; - si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; - si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; - si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; - si->spdhcnt = xfrm_idx_hmask; + si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; + si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; + si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; read_unlock_bh(&xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(struct work_struct *__unused) +static void xfrm_hash_resize(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.policy_hash_work); int dir, total; mutex_lock(&hash_resize_mutex); total = 0; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - if (xfrm_bydst_should_resize(dir, &total)) - xfrm_bydst_resize(dir); + if (xfrm_bydst_should_resize(net, dir, &total)) + xfrm_bydst_resize(net, dir); } - if (xfrm_byidx_should_resize(total)) - xfrm_byidx_resize(total); + if (xfrm_byidx_should_resize(net, total)) + xfrm_byidx_resize(net, total); mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); - /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(u8 type, int dir) +static u32 xfrm_gen_index(struct net *net, int dir) { static u32 idx_generator; @@ -507,7 +505,7 @@ static u32 xfrm_gen_index(u8 type, int dir) idx_generator += 8; if (idx == 0) idx = 8; - list = xfrm_policy_byidx + idx_hash(idx); + list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; hlist_for_each_entry(p, entry, list, byidx) { if (p->index == idx) { @@ -537,19 +535,21 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { + struct net *net = xp_net(policy); struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; struct hlist_node *entry, *newpos; - struct dst_entry *gc_list; + u32 mark = policy->mark.v & policy->mark.m; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(&policy->selector, policy->family, dir); + chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == policy->type && !selector_cmp(&pol->selector, &policy->selector) && + (mark & pol->mark.m) == pol->mark.v && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { @@ -571,60 +571,30 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - xfrm_policy_count[dir]++; + net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); - if (delpol) { - hlist_del(&delpol->bydst); - hlist_del(&delpol->byidx); - xfrm_policy_count[dir]--; - } - policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); - hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); + if (delpol) + __xfrm_policy_unlink(delpol, dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); + hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); + list_add(&policy->walk.all, &net->xfrm.policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); - else if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); - - read_lock_bh(&xfrm_policy_lock); - gc_list = NULL; - entry = &policy->bydst; - hlist_for_each_entry_continue(policy, entry, bydst) { - struct dst_entry *dst; - - write_lock(&policy->lock); - dst = policy->bundles; - if (dst) { - struct dst_entry *tail = dst; - while (tail->next) - tail = tail->next; - tail->next = gc_list; - gc_list = dst; - - policy->bundles = NULL; - } - write_unlock(&policy->lock); - } - read_unlock_bh(&xfrm_policy_lock); - - while (gc_list) { - struct dst_entry *dst = gc_list; - - gc_list = dst->next; - dst_free(dst); - } + else if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); return 0; } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, - struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, + int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) { @@ -634,22 +604,22 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(sel, sel->family, dir); + chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == type && + (mark & pol->mark.m) == pol->mark.v && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); if (delete) { - *err = security_xfrm_policy_delete(pol); + *err = security_xfrm_policy_delete( + pol->security); if (*err) { write_unlock_bh(&xfrm_policy_lock); return pol; } - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - xfrm_policy_count[dir]--; + __xfrm_policy_unlink(pol, dir); } ret = pol; break; @@ -657,16 +627,14 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } write_unlock_bh(&xfrm_policy_lock); - if (ret && delete) { - atomic_inc(&flow_cache_genid); + if (ret && delete) xfrm_policy_kill(ret); - } return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, - int *err) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, + int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -678,20 +646,20 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = xfrm_policy_byidx + idx_hash(id); + chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, entry, chain, byidx) { - if (pol->type == type && pol->index == id) { + if (pol->type == type && pol->index == id && + (mark & pol->mark.m) == pol->mark.v) { xfrm_pol_hold(pol); if (delete) { - *err = security_xfrm_policy_delete(pol); + *err = security_xfrm_policy_delete( + pol->security); if (*err) { write_unlock_bh(&xfrm_policy_lock); return pol; } - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - xfrm_policy_count[dir]--; + __xfrm_policy_unlink(pol, dir); } ret = pol; break; @@ -699,17 +667,15 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, } write_unlock_bh(&xfrm_policy_lock); - if (ret && delete) { - atomic_inc(&flow_cache_genid); + if (ret && delete) xfrm_policy_kill(ret); - } return ret; } EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0; @@ -719,27 +685,30 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) int i; hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; - err = security_xfrm_policy_delete(pol); + err = security_xfrm_policy_delete(pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); return err; } } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; - err = security_xfrm_policy_delete(pol); + err = security_xfrm_policy_delete( + pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); return err; } @@ -750,133 +719,143 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) } #else static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { return 0; } #endif -int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) { - int dir, err = 0; + int dir, err = 0, cnt = 0; write_lock_bh(&xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, audit_info); if (err) goto out; for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; struct hlist_node *entry; - int i, killed; + int i; - killed = 0; again1: hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); + __xfrm_policy_unlink(pol, dir); write_unlock_bh(&xfrm_policy_lock); + cnt++; xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); xfrm_policy_kill(pol); - killed++; write_lock_bh(&xfrm_policy_lock); goto again1; } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); + __xfrm_policy_unlink(pol, dir); write_unlock_bh(&xfrm_policy_lock); + cnt++; xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); xfrm_policy_kill(pol); - killed++; write_lock_bh(&xfrm_policy_lock); goto again2; } } - xfrm_policy_count[dir] -= killed; } - atomic_inc(&flow_cache_genid); + if (!cnt) + err = -ESRCH; out: write_unlock_bh(&xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, + int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *pol, *last = NULL; - struct hlist_node *entry; - int dir, last_dir = 0, count, error; + struct xfrm_policy *pol; + struct xfrm_policy_walk_entry *x; + int error = 0; - read_lock_bh(&xfrm_policy_lock); - count = 0; + if (walk->type >= XFRM_POLICY_TYPE_MAX && + walk->type != XFRM_POLICY_TYPE_ANY) + return -EINVAL; - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - struct hlist_head *table = xfrm_policy_bydst[dir].table; - int i; + if (list_empty(&walk->walk.all) && walk->seq != 0) + return 0; - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { - if (pol->type != type) - continue; - if (last) { - error = func(last, last_dir % XFRM_POLICY_MAX, - count, data); - if (error) - goto out; - } - last = pol; - last_dir = dir; - count++; - } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) { - if (pol->type != type) - continue; - if (last) { - error = func(last, last_dir % XFRM_POLICY_MAX, - count, data); - if (error) - goto out; - } - last = pol; - last_dir = dir; - count++; - } + write_lock_bh(&xfrm_policy_lock); + if (list_empty(&walk->walk.all)) + x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); + else + x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); + list_for_each_entry_from(x, &net->xfrm.policy_all, all) { + if (x->dead) + continue; + pol = container_of(x, struct xfrm_policy, walk); + if (walk->type != XFRM_POLICY_TYPE_ANY && + walk->type != pol->type) + continue; + error = func(pol, xfrm_policy_id2dir(pol->index), + walk->seq, data); + if (error) { + list_move_tail(&walk->walk.all, &x->all); + goto out; } + walk->seq++; } - if (count == 0) { + if (walk->seq == 0) { error = -ENOENT; goto out; } - error = func(last, last_dir % XFRM_POLICY_MAX, 0, data); + list_del_init(&walk->walk.all); out: - read_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); +void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + INIT_LIST_HEAD(&walk->walk.all); + walk->walk.dead = 1; + walk->type = type; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_policy_walk_init); + +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +{ + if (list_empty(&walk->walk.all)) + return; + + write_lock_bh(&xfrm_policy_lock); + list_del(&walk->walk.all); + write_unlock_bh(&xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_policy_walk_done); + /* * Find policy to apply to this flow. * @@ -889,17 +868,20 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, int match, ret = -ESRCH; if (pol->family != family || + (fl->mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; match = xfrm_selector_match(sel, fl, family); if (match) - ret = security_xfrm_policy_lookup(pol, fl->secid, dir); + ret = security_xfrm_policy_lookup(pol->security, fl->secid, + dir); return ret; } -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, +static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, + struct flowi *fl, u16 family, u8 dir) { int err; @@ -915,7 +897,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, return NULL; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(daddr, saddr, family, dir); + chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); @@ -932,7 +914,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, break; } } - chain = &xfrm_policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { @@ -955,32 +937,37 @@ fail: return ret; } -static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static struct xfrm_policy * +__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir) { +#ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; - int err = 0; -#ifdef CONFIG_XFRM_SUB_POLICY - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); - if (IS_ERR(pol)) { - err = PTR_ERR(pol); - pol = NULL; - } - if (pol || err) - goto end; + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); + if (pol != NULL) + return pol; #endif - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); - if (IS_ERR(pol)) { - err = PTR_ERR(pol); - pol = NULL; - } -#ifdef CONFIG_XFRM_SUB_POLICY -end: -#endif - if ((*objp = (void *) pol) != NULL) - *obj_refp = &pol->refcnt; - return err; + return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); +} + +static struct flow_cache_object * +xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, + u8 dir, struct flow_cache_object *old_obj, void *ctx) +{ + struct xfrm_policy *pol; + + if (old_obj) + xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); + + pol = __xfrm_policy_lookup(net, fl, family, dir); + if (IS_ERR_OR_NULL(pol)) + return ERR_CAST(pol); + + /* Resolver returns two references: + * one for cache and one for caller of flow_cache_lookup() */ + xfrm_pol_hold(pol); + + return &pol->flo; } static inline int policy_to_flow_dir(int dir) @@ -1011,8 +998,13 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc int err = 0; if (match) { - err = security_xfrm_policy_lookup(pol, fl->secid, - policy_to_flow_dir(dir)); + if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { + pol = NULL; + goto out; + } + err = security_xfrm_policy_lookup(pol->security, + fl->secid, + policy_to_flow_dir(dir)); if (!err) xfrm_pol_hold(pol); else if (err == -ESRCH) @@ -1022,33 +1014,39 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc } else pol = NULL; } +out: read_unlock_bh(&xfrm_policy_lock); return pol; } static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - struct hlist_head *chain = policy_hash_bysel(&pol->selector, + struct net *net = xp_net(pol); + struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, pol->family, dir); + list_add(&pol->walk.all, &net->xfrm.policy_all); hlist_add_head(&pol->bydst, chain); - hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); - xfrm_policy_count[dir]++; + hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); + net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); - if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); + if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { + struct net *net = xp_net(pol); + if (hlist_unhashed(&pol->bydst)) return NULL; hlist_del(&pol->bydst); hlist_del(&pol->byidx); - xfrm_policy_count[dir]--; + list_del(&pol->walk.all); + net->xfrm.policy_count[dir]--; return pol; } @@ -1059,8 +1057,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir) pol = __xfrm_policy_unlink(pol, dir); write_unlock_bh(&xfrm_policy_lock); if (pol) { - if (dir < XFRM_POLICY_MAX) - atomic_inc(&flow_cache_genid); xfrm_policy_kill(pol); return 0; } @@ -1070,6 +1066,7 @@ EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { + struct net *net = xp_net(pol); struct xfrm_policy *old_pol; #ifdef CONFIG_XFRM_SUB_POLICY @@ -1082,10 +1079,13 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = get_seconds(); - pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) + /* Unlinking succeeds always. This is the only function + * allowed to delete or replace socket policy. + */ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); write_unlock_bh(&xfrm_policy_lock); @@ -1097,16 +1097,18 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) { - struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); + struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); if (newp) { newp->selector = old->selector; - if (security_xfrm_policy_clone(old, newp)) { + if (security_xfrm_policy_clone(old->security, + &newp->security)) { kfree(newp); return NULL; /* ENOMEM */ } newp->lft = old->lft; newp->curlft = old->curlft; + newp->mark = old->mark; newp->action = old->action; newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; @@ -1136,7 +1138,7 @@ int __xfrm_sk_clone_policy(struct sock *sk) } static int -xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, +xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; @@ -1144,7 +1146,7 @@ xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(local, remote); + err = afinfo->get_saddr(net, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1156,6 +1158,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { + struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); @@ -1174,7 +1177,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, local = &tmpl->saddr; family = tmpl->encap_family; if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(&tmp, remote, family); + error = xfrm_get_saddr(net, &tmp, remote, family); if (error) goto fail; local = &tmp; @@ -1194,6 +1197,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, -EINVAL : -EAGAIN); xfrm_state_put(x); } + else if (error == -ESRCH) + error = -EAGAIN; if (!tmpl->optional) goto fail; @@ -1249,18 +1254,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, * still valid. */ -static struct dst_entry * -xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) -{ - struct dst_entry *x; - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return ERR_PTR(-EINVAL); - x = afinfo->find_bundle(fl, policy); - xfrm_policy_put_afinfo(afinfo); - return x; -} - static inline int xfrm_get_tos(struct flowi *fl, int family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -1276,18 +1269,80 @@ static inline int xfrm_get_tos(struct flowi *fl, int family) return tos; } -static inline struct xfrm_dst *xfrm_alloc_dst(int family) +static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) +{ + struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); + struct dst_entry *dst = &xdst->u.dst; + + if (xdst->route == NULL) { + /* Dummy bundle - if it has xfrms we were not + * able to build bundle as template resolution failed. + * It means we need to try again resolving. */ + if (xdst->num_xfrms > 0) + return NULL; + } else { + /* Real bundle */ + if (stale_bundle(dst)) + return NULL; + } + + dst_hold(dst); + return flo; +} + +static int xfrm_bundle_flo_check(struct flow_cache_object *flo) +{ + struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); + struct dst_entry *dst = &xdst->u.dst; + + if (!xdst->route) + return 0; + if (stale_bundle(dst)) + return 0; + + return 1; +} + +static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) +{ + struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); + struct dst_entry *dst = &xdst->u.dst; + + dst_free(dst); +} + +static const struct flow_cache_ops xfrm_bundle_fc_ops = { + .get = xfrm_bundle_flo_get, + .check = xfrm_bundle_flo_check, + .delete = xfrm_bundle_flo_delete, +}; + +static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct dst_ops *dst_ops; struct xfrm_dst *xdst; if (!afinfo) return ERR_PTR(-EINVAL); - xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS); - + switch (family) { + case AF_INET: + dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); xfrm_policy_put_afinfo(afinfo); + xdst->flo.ops = &xfrm_bundle_fc_ops; + return xdst; } @@ -1308,7 +1363,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, return err; } -static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1317,13 +1373,14 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) if (!afinfo) return -EINVAL; - err = afinfo->fill_dst(xdst, dev); + err = afinfo->fill_dst(xdst, dev, fl); xfrm_policy_put_afinfo(afinfo); return err; } + /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ @@ -1333,6 +1390,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct flowi *fl, struct dst_entry *dst) { + struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; struct dst_entry *dst_prev = NULL; @@ -1344,6 +1402,9 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, int trailer_len = 0; int tos; int family = policy->selector.family; + xfrm_address_t saddr, daddr; + + xfrm_flowi_addr_get(fl, &saddr, &daddr, family); tos = xfrm_get_tos(fl, family); err = tos; @@ -1353,7 +1414,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_hold(dst); for (; i < nx; i++) { - struct xfrm_dst *xdst = xfrm_alloc_dst(family); + struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); struct dst_entry *dst1 = &xdst->u.dst; err = PTR_ERR(xdst); @@ -1374,7 +1435,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; - dst = xfrm_dst_lookup(xfrm[i], tos, family); + dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, + family); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; @@ -1382,7 +1444,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_hold(dst); dst1->xfrm = xfrm[i]; - xdst->genid = xfrm[i]->genid; + xdst->xfrm_genid = xfrm[i]->genid; dst1->obsolete = -1; dst1->flags |= DST_HOST; @@ -1408,7 +1470,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (!dev) goto free_dst; - /* Copy neighbout for reachability confirmation */ + /* Copy neighbour for reachability confirmation */ dst0->neighbour = neigh_clone(dst->neighbour); xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); @@ -1417,7 +1479,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; - err = xfrm_fill_dst(xdst, dev); + err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; @@ -1475,263 +1537,348 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) #endif } -static int stale_bundle(struct dst_entry *dst); +static int xfrm_expand_policies(struct flowi *fl, u16 family, + struct xfrm_policy **pols, + int *num_pols, int *num_xfrms) +{ + int i; + + if (*num_pols == 0 || !pols[0]) { + *num_pols = 0; + *num_xfrms = 0; + return 0; + } + if (IS_ERR(pols[0])) + return PTR_ERR(pols[0]); + + *num_xfrms = pols[0]->xfrm_nr; + +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && + pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), + XFRM_POLICY_TYPE_MAIN, + fl, family, + XFRM_POLICY_OUT); + if (pols[1]) { + if (IS_ERR(pols[1])) { + xfrm_pols_put(pols, *num_pols); + return PTR_ERR(pols[1]); + } + (*num_pols) ++; + (*num_xfrms) += pols[1]->xfrm_nr; + } + } +#endif + for (i = 0; i < *num_pols; i++) { + if (pols[i]->action != XFRM_POLICY_ALLOW) { + *num_xfrms = -1; + break; + } + } + + return 0; + +} + +static struct xfrm_dst * +xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, + struct flowi *fl, u16 family, + struct dst_entry *dst_orig) +{ + struct net *net = xp_net(pols[0]); + struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct dst_entry *dst; + struct xfrm_dst *xdst; + int err; + + /* Try to instantiate a bundle */ + err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); + if (err < 0) { + if (err != -EAGAIN) + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); + return ERR_PTR(err); + } + + dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); + if (IS_ERR(dst)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); + return ERR_CAST(dst); + } + + xdst = (struct xfrm_dst *)dst; + xdst->num_xfrms = err; + if (num_pols > 1) + err = xfrm_dst_update_parent(dst, &pols[1]->selector); + else + err = xfrm_dst_update_origin(dst, fl); + if (unlikely(err)) { + dst_free(dst); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + return ERR_PTR(err); + } + + xdst->num_pols = num_pols; + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); + xdst->policy_genid = atomic_read(&pols[0]->genid); + + return xdst; +} + +static struct flow_cache_object * +xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, + struct flow_cache_object *oldflo, void *ctx) +{ + struct dst_entry *dst_orig = (struct dst_entry *)ctx; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + struct xfrm_dst *xdst, *new_xdst; + int num_pols = 0, num_xfrms = 0, i, err, pol_dead; + + /* Check if the policies from old bundle are usable */ + xdst = NULL; + if (oldflo) { + xdst = container_of(oldflo, struct xfrm_dst, flo); + num_pols = xdst->num_pols; + num_xfrms = xdst->num_xfrms; + pol_dead = 0; + for (i = 0; i < num_pols; i++) { + pols[i] = xdst->pols[i]; + pol_dead |= pols[i]->walk.dead; + } + if (pol_dead) { + dst_free(&xdst->u.dst); + xdst = NULL; + num_pols = 0; + num_xfrms = 0; + oldflo = NULL; + } + } + + /* Resolve policies to use if we couldn't get them from + * previous cache entry */ + if (xdst == NULL) { + num_pols = 1; + pols[0] = __xfrm_policy_lookup(net, fl, family, dir); + err = xfrm_expand_policies(fl, family, pols, + &num_pols, &num_xfrms); + if (err < 0) + goto inc_error; + if (num_pols == 0) + return NULL; + if (num_xfrms <= 0) + goto make_dummy_bundle; + } + + new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); + if (IS_ERR(new_xdst)) { + err = PTR_ERR(new_xdst); + if (err != -EAGAIN) + goto error; + if (oldflo == NULL) + goto make_dummy_bundle; + dst_hold(&xdst->u.dst); + return oldflo; + } + + /* Kill the previous bundle */ + if (xdst) { + /* The policies were stolen for newly generated bundle */ + xdst->num_pols = 0; + dst_free(&xdst->u.dst); + } + + /* Flow cache does not have reference, it dst_free()'s, + * but we do need to return one reference for original caller */ + dst_hold(&new_xdst->u.dst); + return &new_xdst->flo; + +make_dummy_bundle: + /* We found policies, but there's no bundles to instantiate: + * either because the policy blocks, has no transformations or + * we could not build template (no xfrm_states).*/ + xdst = xfrm_alloc_dst(net, family); + if (IS_ERR(xdst)) { + xfrm_pols_put(pols, num_pols); + return ERR_CAST(xdst); + } + xdst->num_pols = num_pols; + xdst->num_xfrms = num_xfrms; + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); + + dst_hold(&xdst->u.dst); + return &xdst->flo; + +inc_error: + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); +error: + if (xdst != NULL) + dst_free(&xdst->u.dst); + else + xfrm_pols_put(pols, num_pols); + return ERR_PTR(err); +} /* Main function: finds/creates a bundle for given flow. * * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { - struct xfrm_policy *policy; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - int npols; - int pol_dead; - int xfrm_nr; - int pi; - struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; - struct dst_entry *dst, *dst_orig = *dst_p; - int nx = 0; - int err; - u32 genid; - u16 family; + struct flow_cache_object *flo; + struct xfrm_dst *xdst; + struct dst_entry *dst, *dst_orig = *dst_p, *route; + u16 family = dst_orig->ops->family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + int i, err, num_pols, num_xfrms = 0, drop_pols = 0; restart: - genid = atomic_read(&flow_cache_genid); - policy = NULL; - for (pi = 0; pi < ARRAY_SIZE(pols); pi++) - pols[pi] = NULL; - npols = 0; - pol_dead = 0; - xfrm_nr = 0; + dst = NULL; + xdst = NULL; + route = NULL; if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); - err = PTR_ERR(policy); - if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + num_pols = 1; + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + err = xfrm_expand_policies(fl, family, pols, + &num_pols, &num_xfrms); + if (err < 0) goto dropdst; + + if (num_pols) { + if (num_xfrms <= 0) { + drop_pols = num_pols; + goto no_transform; + } + + xdst = xfrm_resolve_and_create_bundle( + pols, num_pols, fl, + family, dst_orig); + if (IS_ERR(xdst)) { + xfrm_pols_put(pols, num_pols); + err = PTR_ERR(xdst); + goto dropdst; + } + + spin_lock_bh(&xfrm_policy_sk_bundle_lock); + xdst->u.dst.next = xfrm_policy_sk_bundles; + xfrm_policy_sk_bundles = &xdst->u.dst; + spin_unlock_bh(&xfrm_policy_sk_bundle_lock); + + route = xdst->route; } } - if (!policy) { + if (xdst == NULL) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || - !xfrm_policy_count[XFRM_POLICY_OUT]) + !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - policy = flow_cache_lookup(fl, dst_orig->ops->family, - dir, xfrm_policy_lookup); - err = PTR_ERR(policy); - if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + flo = flow_cache_lookup(net, fl, family, dir, + xfrm_bundle_lookup, dst_orig); + if (flo == NULL) + goto nopol; + if (IS_ERR(flo)) { + err = PTR_ERR(flo); goto dropdst; } + xdst = container_of(flo, struct xfrm_dst, flo); + + num_pols = xdst->num_pols; + num_xfrms = xdst->num_xfrms; + memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols); + route = xdst->route; + } + + dst = &xdst->u.dst; + if (route == NULL && num_xfrms > 0) { + /* The only case when xfrm_bundle_lookup() returns a + * bundle with null route, is when the template could + * not be resolved. It means policies are there, but + * bundle could not be created, since we don't yet + * have the xfrm_state's. We need to wait for KM to + * negotiate new SA's or bail out with error.*/ + if (net->xfrm.sysctl_larval_drop) { + /* EREMOTE tells the caller to generate + * a one-shot blackhole route. */ + dst_release(dst); + xfrm_pols_put(pols, drop_pols); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); + return -EREMOTE; + } + if (flags & XFRM_LOOKUP_WAIT) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(&net->xfrm.km_waitq, &wait); + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + set_current_state(TASK_RUNNING); + remove_wait_queue(&net->xfrm.km_waitq, &wait); + + if (!signal_pending(current)) { + dst_release(dst); + goto restart; + } + + err = -ERESTART; + } else + err = -EAGAIN; + + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); + goto error; } - if (!policy) +no_transform: + if (num_pols == 0) goto nopol; - family = dst_orig->ops->family; - pols[0] = policy; - npols ++; - xfrm_nr += pols[0]->xfrm_nr; - - err = -ENOENT; - if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP)) + if ((flags & XFRM_LOOKUP_ICMP) && + !(pols[0]->flags & XFRM_POLICY_ICMP)) { + err = -ENOENT; goto error; + } - policy->curlft.use_time = get_seconds(); + for (i = 0; i < num_pols; i++) + pols[i]->curlft.use_time = get_seconds(); - switch (policy->action) { - default: - case XFRM_POLICY_BLOCK: + if (num_xfrms < 0) { /* Prohibit the flow */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; - - case XFRM_POLICY_ALLOW: -#ifndef CONFIG_XFRM_SUB_POLICY - if (policy->xfrm_nr == 0) { - /* Flow passes not transformed. */ - xfrm_pol_put(policy); - return 0; - } -#endif - - /* Try to find matching bundle. - * - * LATER: help from flow cache. It is optional, this - * is required only for output policy. - */ - dst = xfrm_find_bundle(fl, policy, family); - if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); - err = PTR_ERR(dst); - goto error; - } - - if (dst) - break; - -#ifdef CONFIG_XFRM_SUB_POLICY - if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, - fl, family, - XFRM_POLICY_OUT); - if (pols[1]) { - if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); - err = PTR_ERR(pols[1]); - goto error; - } - if (pols[1]->action == XFRM_POLICY_BLOCK) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); - err = -EPERM; - goto error; - } - npols ++; - xfrm_nr += pols[1]->xfrm_nr; - } - } - - /* - * Because neither flowi nor bundle information knows about - * transformation template size. On more than one policy usage - * we can realize whether all of them is bypass or not after - * they are searched. See above not-transformed bypass - * is surrounded by non-sub policy configuration, too. - */ - if (xfrm_nr == 0) { - /* Flow passes not transformed. */ - xfrm_pols_put(pols, npols); - return 0; - } - -#endif - nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); - - if (unlikely(nx<0)) { - err = nx; - if (err == -EAGAIN && sysctl_xfrm_larval_drop) { - /* EREMOTE tells the caller to generate - * a one-shot blackhole route. - */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); - xfrm_pol_put(policy); - return -EREMOTE; - } - if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&km_waitq, &wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - set_current_state(TASK_RUNNING); - remove_wait_queue(&km_waitq, &wait); - - nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); - - if (nx == -EAGAIN && signal_pending(current)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); - err = -ERESTART; - goto error; - } - if (nx == -EAGAIN || - genid != atomic_read(&flow_cache_genid)) { - xfrm_pols_put(pols, npols); - goto restart; - } - err = nx; - } - if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); - goto error; - } - } - if (nx == 0) { - /* Flow passes not transformed. */ - xfrm_pols_put(pols, npols); - return 0; - } - - dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); - err = PTR_ERR(dst); - if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR); - goto error; - } - - for (pi = 0; pi < npols; pi++) { - read_lock_bh(&pols[pi]->lock); - pol_dead |= pols[pi]->dead; - read_unlock_bh(&pols[pi]->lock); - } - - write_lock_bh(&policy->lock); - if (unlikely(pol_dead || stale_bundle(dst))) { - /* Wow! While we worked on resolving, this - * policy has gone. Retry. It is not paranoia, - * we just cannot enlist new bundle to dead object. - * We can't enlist stable bundles either. - */ - write_unlock_bh(&policy->lock); - if (dst) - dst_free(dst); - - if (pol_dead) - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD); - else - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); - err = -EHOSTUNREACH; - goto error; - } - - if (npols > 1) - err = xfrm_dst_update_parent(dst, &pols[1]->selector); - else - err = xfrm_dst_update_origin(dst, fl); - if (unlikely(err)) { - write_unlock_bh(&policy->lock); - if (dst) - dst_free(dst); - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); - goto error; - } - - dst->next = policy->bundles; - policy->bundles = dst; - dst_hold(dst); - write_unlock_bh(&policy->lock); + } else if (num_xfrms > 0) { + /* Flow transformed */ + *dst_p = dst; + dst_release(dst_orig); + } else { + /* Flow passes untransformed */ + dst_release(dst); } - *dst_p = dst; - dst_release(dst_orig); - xfrm_pols_put(pols, npols); +ok: + xfrm_pols_put(pols, drop_pols); return 0; +nopol: + if (!(flags & XFRM_LOOKUP_ICMP)) + goto ok; + err = -ENOENT; error: - xfrm_pols_put(pols, npols); + dst_release(dst); dropdst: dst_release(dst_orig); *dst_p = NULL; + xfrm_pols_put(pols, drop_pols); return err; - -nopol: - err = -ENOENT; - if (flags & XFRM_LOOKUP_ICMP) - goto dropdst; - return 0; } EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { - int err = __xfrm_lookup(dst_p, fl, sk, flags); + int err = __xfrm_lookup(net, dst_p, fl, sk, flags); if (err == -EREMOTE) { dst_release(*dst_p); @@ -1772,7 +1919,7 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && - ((tmpl->aalgos & (1<props.aalgo)) || + (tmpl->allalgs || (tmpl->aalgos & (1<props.aalgo)) || !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && !(x->props.mode != XFRM_MODE_TRANSPORT && xfrm_state_addr_cmp(tmpl, x, family)); @@ -1839,6 +1986,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; @@ -1854,7 +2002,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -1867,7 +2015,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (i=skb->sp->len-1; i>=0; i--) { struct xfrm_state *x = skb->sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; } } @@ -1877,24 +2025,31 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } } - if (!pol) - pol = flow_cache_lookup(&fl, family, fl_dir, - xfrm_policy_lookup); + if (!pol) { + struct flow_cache_object *flo; + + flo = flow_cache_lookup(net, &fl, family, fl_dir, + xfrm_policy_lookup, NULL); + if (IS_ERR_OR_NULL(flo)) + pol = ERR_CAST(flo); + else + pol = container_of(flo, struct xfrm_policy, flo); + } if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } if (!pol) { if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); - XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } return 1; @@ -1906,12 +2061,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, npols ++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } pols[1]->curlft.use_time = get_seconds(); @@ -1935,11 +2090,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (pi = 0; pi < npols; pi++) { if (pols[pi] != pol && pols[pi]->action != XFRM_POLICY_ALLOW) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); goto reject; } if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto reject_error; } for (i = 0; i < pols[pi]->xfrm_nr; i++) @@ -1963,20 +2118,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (k < -1) /* "-2 - errored_index" returned */ xerr_idx = -(2+k); - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } } if (secpath_has_nontransport(sp, k, &xerr_idx)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } xfrm_pols_put(pols, npols); return 1; } - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); reject: xfrm_secpath_reject(xerr_idx, skb, &fl); @@ -1988,15 +2143,21 @@ EXPORT_SYMBOL(__xfrm_policy_check); int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct flowi fl; + struct dst_entry *dst; + int res; if (xfrm_decode_session(skb, &fl, family) < 0) { - /* XXX: we should have something like FWDHDRERROR here. */ - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; + dst = skb_dst(skb); + + res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; + skb_dst_set(skb, dst); + return res; } EXPORT_SYMBOL(__xfrm_route_forward); @@ -2038,7 +2199,7 @@ static int stale_bundle(struct dst_entry *dst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = dev->nd_net->loopback_dev; + dst->dev = dev_net(dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); } @@ -2062,69 +2223,22 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) +static void __xfrm_garbage_collect(struct net *net) { - struct dst_entry *dst, **dstp; + struct dst_entry *head, *next; - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = *gc_list_p; - *gc_list_p = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); -} + flow_cache_flush(); -static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) -{ - struct dst_entry *gc_list = NULL; - int dir; + spin_lock_bh(&xfrm_policy_sk_bundle_lock); + head = xfrm_policy_sk_bundles; + xfrm_policy_sk_bundles = NULL; + spin_unlock_bh(&xfrm_policy_sk_bundle_lock); - read_lock_bh(&xfrm_policy_lock); - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - struct xfrm_policy *pol; - struct hlist_node *entry; - struct hlist_head *table; - int i; - - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) - prune_one_bundle(pol, func, &gc_list); - - table = xfrm_policy_bydst[dir].table; - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) - prune_one_bundle(pol, func, &gc_list); - } + while (head) { + next = head->next; + dst_free(head); + head = next; } - read_unlock_bh(&xfrm_policy_lock); - - while (gc_list) { - struct dst_entry *dst = gc_list; - gc_list = dst->next; - dst_free(dst); - } -} - -static int unused_bundle(struct dst_entry *dst) -{ - return !atomic_read(&dst->__refcnt); -} - -static void __xfrm_garbage_collect(void) -{ - xfrm_prune_bundles(unused_bundle); -} - -static int xfrm_flush_bundles(void) -{ - xfrm_prune_bundles(stale_bundle); - return 0; } static void xfrm_init_pmtu(struct dst_entry *dst) @@ -2184,7 +2298,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; - if (xdst->genid != dst->xfrm->genid) + if (xdst->xfrm_genid != dst->xfrm->genid) + return 0; + if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0; if (strict && fl && @@ -2235,6 +2351,7 @@ EXPORT_SYMBOL(xfrm_bundle_ok); int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { + struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; @@ -2258,6 +2375,27 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) xfrm_policy_afinfo[afinfo->family] = afinfo; } write_unlock_bh(&xfrm_policy_afinfo_lock); + + rtnl_lock(); + for_each_net(net) { + struct dst_ops *xfrm_dst_ops; + + switch (afinfo->family) { + case AF_INET: + xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + *xfrm_dst_ops = *afinfo->dst_ops; + } + rtnl_unlock(); + return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -2288,6 +2426,22 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); +static void __net_init xfrm_dst_ops_init(struct net *net) +{ + struct xfrm_policy_afinfo *afinfo; + + read_lock_bh(&xfrm_policy_afinfo_lock); + afinfo = xfrm_policy_afinfo[AF_INET]; + if (afinfo) + net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + afinfo = xfrm_policy_afinfo[AF_INET6]; + if (afinfo) + net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; +#endif + read_unlock_bh(&xfrm_policy_afinfo_lock); +} + static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { struct xfrm_policy_afinfo *afinfo; @@ -2309,38 +2463,54 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) - return NOTIFY_DONE; - switch (event) { case NETDEV_DOWN: - xfrm_flush_bundles(); + __xfrm_garbage_collect(dev_net(dev)); } return NOTIFY_DONE; } static struct notifier_block xfrm_dev_notifier = { - xfrm_dev_event, - NULL, - 0 + .notifier_call = xfrm_dev_event, }; #ifdef CONFIG_XFRM_STATISTICS -static int __init xfrm_statistics_init(void) +static int __net_init xfrm_statistics_init(struct net *net) { - if (snmp_mib_init((void **)xfrm_statistics, + int rv; + + if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, sizeof(struct linux_xfrm_mib)) < 0) return -ENOMEM; + rv = xfrm_proc_init(net); + if (rv < 0) + snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + return rv; +} + +static void xfrm_statistics_fini(struct net *net) +{ + xfrm_proc_fini(net); + snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); +} +#else +static int __net_init xfrm_statistics_init(struct net *net) +{ return 0; } + +static void xfrm_statistics_fini(struct net *net) +{ +} #endif -static void __init xfrm_policy_init(void) +static int __net_init xfrm_policy_init(struct net *net) { unsigned int hmask, sz; int dir; - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", + if (net_eq(net, &init_net)) + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); @@ -2348,38 +2518,124 @@ static void __init xfrm_policy_init(void) hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); - xfrm_policy_byidx = xfrm_hash_alloc(sz); - xfrm_idx_hmask = hmask; - if (!xfrm_policy_byidx) - panic("XFRM: failed to allocate byidx hash\n"); + net->xfrm.policy_byidx = xfrm_hash_alloc(sz); + if (!net->xfrm.policy_byidx) + goto out_byidx; + net->xfrm.policy_idx_hmask = hmask; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { struct xfrm_policy_hash *htab; - INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); + net->xfrm.policy_count[dir] = 0; + INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); - htab = &xfrm_policy_bydst[dir]; + htab = &net->xfrm.policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); - htab->hmask = hmask; if (!htab->table) - panic("XFRM: failed to allocate bydst hash\n"); + goto out_bydst; + htab->hmask = hmask; } - INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); - register_netdevice_notifier(&xfrm_dev_notifier); + INIT_LIST_HEAD(&net->xfrm.policy_all); + INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); + if (net_eq(net, &init_net)) + register_netdevice_notifier(&xfrm_dev_notifier); + return 0; + +out_bydst: + for (dir--; dir >= 0; dir--) { + struct xfrm_policy_hash *htab; + + htab = &net->xfrm.policy_bydst[dir]; + xfrm_hash_free(htab->table, sz); + } + xfrm_hash_free(net->xfrm.policy_byidx, sz); +out_byidx: + return -ENOMEM; } -void __init xfrm_init(void) +static void xfrm_policy_fini(struct net *net) { -#ifdef CONFIG_XFRM_STATISTICS - xfrm_statistics_init(); + struct xfrm_audit audit_info; + unsigned int sz; + int dir; + + flush_work(&net->xfrm.policy_hash_work); +#ifdef CONFIG_XFRM_SUB_POLICY + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); #endif - xfrm_state_init(); - xfrm_policy_init(); + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + + WARN_ON(!list_empty(&net->xfrm.policy_all)); + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); + + htab = &net->xfrm.policy_bydst[dir]; + sz = (htab->hmask + 1); + WARN_ON(!hlist_empty(htab->table)); + xfrm_hash_free(htab->table, sz); + } + + sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); + xfrm_hash_free(net->xfrm.policy_byidx, sz); +} + +static int __net_init xfrm_net_init(struct net *net) +{ + int rv; + + rv = xfrm_statistics_init(net); + if (rv < 0) + goto out_statistics; + rv = xfrm_state_init(net); + if (rv < 0) + goto out_state; + rv = xfrm_policy_init(net); + if (rv < 0) + goto out_policy; + xfrm_dst_ops_init(net); + rv = xfrm_sysctl_init(net); + if (rv < 0) + goto out_sysctl; + return 0; + +out_sysctl: + xfrm_policy_fini(net); +out_policy: + xfrm_state_fini(net); +out_state: + xfrm_statistics_fini(net); +out_statistics: + return rv; +} + +static void __net_exit xfrm_net_exit(struct net *net) +{ + xfrm_sysctl_fini(net); + xfrm_policy_fini(net); + xfrm_state_fini(net); + xfrm_statistics_fini(net); +} + +static struct pernet_operations __net_initdata xfrm_net_ops = { + .init = xfrm_net_init, + .exit = xfrm_net_exit, +}; + +void __init xfrm_init(void) +{ + register_pernet_subsys(&xfrm_net_ops); xfrm_input_init(); -#ifdef CONFIG_XFRM_STATISTICS - xfrm_proc_init(); -#endif } #ifdef CONFIG_AUDITSYSCALL @@ -2395,25 +2651,21 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, switch(sel->family) { case AF_INET: - audit_log_format(audit_buf, " src=" NIPQUAD_FMT, - NIPQUAD(sel->saddr.a4)); + audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); if (sel->prefixlen_s != 32) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); - audit_log_format(audit_buf, " dst=" NIPQUAD_FMT, - NIPQUAD(sel->daddr.a4)); + audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); if (sel->prefixlen_d != 32) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); break; case AF_INET6: - audit_log_format(audit_buf, " src=" NIP6_FMT, - NIP6(*(struct in6_addr *)sel->saddr.a6)); + audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); if (sel->prefixlen_s != 128) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); - audit_log_format(audit_buf, " dst=" NIP6_FMT, - NIP6(*(struct in6_addr *)sel->daddr.a6)); + audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); if (sel->prefixlen_d != 128) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); @@ -2422,14 +2674,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - u32 auid, u32 secid) + uid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -2437,14 +2689,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - u32 auid, u32 secid) + uid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -2483,7 +2735,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, u32 priority = ~0U; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir); + chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { @@ -2492,7 +2744,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, break; } } - chain = &xfrm_policy_inexact[dir]; + chain = &init_net.xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && @@ -2544,11 +2796,10 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, struct xfrm_migrate *m, int num_migrate) { struct xfrm_migrate *mp; - struct dst_entry *dst; int i, j, n = 0; write_lock_bh(&pol->lock); - if (unlikely(pol->dead)) { + if (unlikely(pol->walk.dead)) { /* target policy has been deleted */ write_unlock_bh(&pol->lock); return -ENOENT; @@ -2569,10 +2820,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, sizeof(pol->xfrm_vec[i].saddr)); pol->xfrm_vec[i].encap_family = mp->new_family; /* flush bundles */ - while ((dst = pol->bundles) != NULL) { - pol->bundles = dst->next; - dst_free(dst); - } + atomic_inc(&pol->genid); } } @@ -2619,7 +2867,8 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) } int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -2663,7 +2912,7 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate); + km_migrate(sel, dir, type, m, num_migrate, k); xfrm_pol_put(pol);