netns xfrm: per-netns xfrm_policy_bydst hash
[safe/jmp/linux-2.6] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/xfrm.h>
30 #include <net/ip.h>
31 #ifdef CONFIG_XFRM_STATISTICS
32 #include <net/snmp.h>
33 #endif
34
35 #include "xfrm_hash.h"
36
37 int sysctl_xfrm_larval_drop __read_mostly = 1;
38
39 #ifdef CONFIG_XFRM_STATISTICS
40 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41 EXPORT_SYMBOL(xfrm_statistics);
42 #endif
43
44 DEFINE_MUTEX(xfrm_cfg_mutex);
45 EXPORT_SYMBOL(xfrm_cfg_mutex);
46
47 static DEFINE_RWLOCK(xfrm_policy_lock);
48
49 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
50 EXPORT_SYMBOL(xfrm_policy_count);
51
52 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
53 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
54
55 static struct kmem_cache *xfrm_dst_cache __read_mostly;
56
57 static HLIST_HEAD(xfrm_policy_gc_list);
58 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
59
60 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
61 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
62 static void xfrm_init_pmtu(struct dst_entry *dst);
63
64 static inline int
65 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
66 {
67         return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
68                 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
69                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
70                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
71                 (fl->proto == sel->proto || !sel->proto) &&
72                 (fl->oif == sel->ifindex || !sel->ifindex);
73 }
74
75 static inline int
76 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
77 {
78         return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
79                 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
80                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
81                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
82                 (fl->proto == sel->proto || !sel->proto) &&
83                 (fl->oif == sel->ifindex || !sel->ifindex);
84 }
85
86 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
87                     unsigned short family)
88 {
89         switch (family) {
90         case AF_INET:
91                 return __xfrm4_selector_match(sel, fl);
92         case AF_INET6:
93                 return __xfrm6_selector_match(sel, fl);
94         }
95         return 0;
96 }
97
98 static inline struct dst_entry *__xfrm_dst_lookup(int tos,
99                                                   xfrm_address_t *saddr,
100                                                   xfrm_address_t *daddr,
101                                                   int family)
102 {
103         struct xfrm_policy_afinfo *afinfo;
104         struct dst_entry *dst;
105
106         afinfo = xfrm_policy_get_afinfo(family);
107         if (unlikely(afinfo == NULL))
108                 return ERR_PTR(-EAFNOSUPPORT);
109
110         dst = afinfo->dst_lookup(tos, saddr, daddr);
111
112         xfrm_policy_put_afinfo(afinfo);
113
114         return dst;
115 }
116
117 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
118                                                 xfrm_address_t *prev_saddr,
119                                                 xfrm_address_t *prev_daddr,
120                                                 int family)
121 {
122         xfrm_address_t *saddr = &x->props.saddr;
123         xfrm_address_t *daddr = &x->id.daddr;
124         struct dst_entry *dst;
125
126         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
127                 saddr = x->coaddr;
128                 daddr = prev_daddr;
129         }
130         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
131                 saddr = prev_saddr;
132                 daddr = x->coaddr;
133         }
134
135         dst = __xfrm_dst_lookup(tos, saddr, daddr, family);
136
137         if (!IS_ERR(dst)) {
138                 if (prev_saddr != saddr)
139                         memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
140                 if (prev_daddr != daddr)
141                         memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
142         }
143
144         return dst;
145 }
146
147 static inline unsigned long make_jiffies(long secs)
148 {
149         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
150                 return MAX_SCHEDULE_TIMEOUT-1;
151         else
152                 return secs*HZ;
153 }
154
155 static void xfrm_policy_timer(unsigned long data)
156 {
157         struct xfrm_policy *xp = (struct xfrm_policy*)data;
158         unsigned long now = get_seconds();
159         long next = LONG_MAX;
160         int warn = 0;
161         int dir;
162
163         read_lock(&xp->lock);
164
165         if (xp->walk.dead)
166                 goto out;
167
168         dir = xfrm_policy_id2dir(xp->index);
169
170         if (xp->lft.hard_add_expires_seconds) {
171                 long tmo = xp->lft.hard_add_expires_seconds +
172                         xp->curlft.add_time - now;
173                 if (tmo <= 0)
174                         goto expired;
175                 if (tmo < next)
176                         next = tmo;
177         }
178         if (xp->lft.hard_use_expires_seconds) {
179                 long tmo = xp->lft.hard_use_expires_seconds +
180                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
181                 if (tmo <= 0)
182                         goto expired;
183                 if (tmo < next)
184                         next = tmo;
185         }
186         if (xp->lft.soft_add_expires_seconds) {
187                 long tmo = xp->lft.soft_add_expires_seconds +
188                         xp->curlft.add_time - now;
189                 if (tmo <= 0) {
190                         warn = 1;
191                         tmo = XFRM_KM_TIMEOUT;
192                 }
193                 if (tmo < next)
194                         next = tmo;
195         }
196         if (xp->lft.soft_use_expires_seconds) {
197                 long tmo = xp->lft.soft_use_expires_seconds +
198                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
199                 if (tmo <= 0) {
200                         warn = 1;
201                         tmo = XFRM_KM_TIMEOUT;
202                 }
203                 if (tmo < next)
204                         next = tmo;
205         }
206
207         if (warn)
208                 km_policy_expired(xp, dir, 0, 0);
209         if (next != LONG_MAX &&
210             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
211                 xfrm_pol_hold(xp);
212
213 out:
214         read_unlock(&xp->lock);
215         xfrm_pol_put(xp);
216         return;
217
218 expired:
219         read_unlock(&xp->lock);
220         if (!xfrm_policy_delete(xp, dir))
221                 km_policy_expired(xp, dir, 1, 0);
222         xfrm_pol_put(xp);
223 }
224
225
226 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
227  * SPD calls.
228  */
229
230 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
231 {
232         struct xfrm_policy *policy;
233
234         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
235
236         if (policy) {
237                 write_pnet(&policy->xp_net, net);
238                 INIT_LIST_HEAD(&policy->walk.all);
239                 INIT_HLIST_NODE(&policy->bydst);
240                 INIT_HLIST_NODE(&policy->byidx);
241                 rwlock_init(&policy->lock);
242                 atomic_set(&policy->refcnt, 1);
243                 setup_timer(&policy->timer, xfrm_policy_timer,
244                                 (unsigned long)policy);
245         }
246         return policy;
247 }
248 EXPORT_SYMBOL(xfrm_policy_alloc);
249
250 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
251
252 void xfrm_policy_destroy(struct xfrm_policy *policy)
253 {
254         BUG_ON(!policy->walk.dead);
255
256         BUG_ON(policy->bundles);
257
258         if (del_timer(&policy->timer))
259                 BUG();
260
261         security_xfrm_policy_free(policy->security);
262         kfree(policy);
263 }
264 EXPORT_SYMBOL(xfrm_policy_destroy);
265
266 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
267 {
268         struct dst_entry *dst;
269
270         while ((dst = policy->bundles) != NULL) {
271                 policy->bundles = dst->next;
272                 dst_free(dst);
273         }
274
275         if (del_timer(&policy->timer))
276                 atomic_dec(&policy->refcnt);
277
278         if (atomic_read(&policy->refcnt) > 1)
279                 flow_cache_flush();
280
281         xfrm_pol_put(policy);
282 }
283
284 static void xfrm_policy_gc_task(struct work_struct *work)
285 {
286         struct xfrm_policy *policy;
287         struct hlist_node *entry, *tmp;
288         struct hlist_head gc_list;
289
290         spin_lock_bh(&xfrm_policy_gc_lock);
291         gc_list.first = xfrm_policy_gc_list.first;
292         INIT_HLIST_HEAD(&xfrm_policy_gc_list);
293         spin_unlock_bh(&xfrm_policy_gc_lock);
294
295         hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
296                 xfrm_policy_gc_kill(policy);
297 }
298 static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
299
300 /* Rule must be locked. Release descentant resources, announce
301  * entry dead. The rule must be unlinked from lists to the moment.
302  */
303
304 static void xfrm_policy_kill(struct xfrm_policy *policy)
305 {
306         int dead;
307
308         write_lock_bh(&policy->lock);
309         dead = policy->walk.dead;
310         policy->walk.dead = 1;
311         write_unlock_bh(&policy->lock);
312
313         if (unlikely(dead)) {
314                 WARN_ON(1);
315                 return;
316         }
317
318         spin_lock_bh(&xfrm_policy_gc_lock);
319         hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
320         spin_unlock_bh(&xfrm_policy_gc_lock);
321
322         schedule_work(&xfrm_policy_gc_work);
323 }
324
325 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
326
327 static inline unsigned int idx_hash(u32 index)
328 {
329         return __idx_hash(index, init_net.xfrm.policy_idx_hmask);
330 }
331
332 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
333 {
334         unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
335         unsigned int hash = __sel_hash(sel, family, hmask);
336
337         return (hash == hmask + 1 ?
338                 &init_net.xfrm.policy_inexact[dir] :
339                 init_net.xfrm.policy_bydst[dir].table + hash);
340 }
341
342 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
343 {
344         unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
345         unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
346
347         return init_net.xfrm.policy_bydst[dir].table + hash;
348 }
349
350 static void xfrm_dst_hash_transfer(struct hlist_head *list,
351                                    struct hlist_head *ndsttable,
352                                    unsigned int nhashmask)
353 {
354         struct hlist_node *entry, *tmp, *entry0 = NULL;
355         struct xfrm_policy *pol;
356         unsigned int h0 = 0;
357
358 redo:
359         hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
360                 unsigned int h;
361
362                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
363                                 pol->family, nhashmask);
364                 if (!entry0) {
365                         hlist_del(entry);
366                         hlist_add_head(&pol->bydst, ndsttable+h);
367                         h0 = h;
368                 } else {
369                         if (h != h0)
370                                 continue;
371                         hlist_del(entry);
372                         hlist_add_after(entry0, &pol->bydst);
373                 }
374                 entry0 = entry;
375         }
376         if (!hlist_empty(list)) {
377                 entry0 = NULL;
378                 goto redo;
379         }
380 }
381
382 static void xfrm_idx_hash_transfer(struct hlist_head *list,
383                                    struct hlist_head *nidxtable,
384                                    unsigned int nhashmask)
385 {
386         struct hlist_node *entry, *tmp;
387         struct xfrm_policy *pol;
388
389         hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
390                 unsigned int h;
391
392                 h = __idx_hash(pol->index, nhashmask);
393                 hlist_add_head(&pol->byidx, nidxtable+h);
394         }
395 }
396
397 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
398 {
399         return ((old_hmask + 1) << 1) - 1;
400 }
401
402 static void xfrm_bydst_resize(int dir)
403 {
404         unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
405         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
406         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
407         struct hlist_head *odst = init_net.xfrm.policy_bydst[dir].table;
408         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
409         int i;
410
411         if (!ndst)
412                 return;
413
414         write_lock_bh(&xfrm_policy_lock);
415
416         for (i = hmask; i >= 0; i--)
417                 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
418
419         init_net.xfrm.policy_bydst[dir].table = ndst;
420         init_net.xfrm.policy_bydst[dir].hmask = nhashmask;
421
422         write_unlock_bh(&xfrm_policy_lock);
423
424         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
425 }
426
427 static void xfrm_byidx_resize(int total)
428 {
429         unsigned int hmask = init_net.xfrm.policy_idx_hmask;
430         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
431         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
432         struct hlist_head *oidx = init_net.xfrm.policy_byidx;
433         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
434         int i;
435
436         if (!nidx)
437                 return;
438
439         write_lock_bh(&xfrm_policy_lock);
440
441         for (i = hmask; i >= 0; i--)
442                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
443
444         init_net.xfrm.policy_byidx = nidx;
445         init_net.xfrm.policy_idx_hmask = nhashmask;
446
447         write_unlock_bh(&xfrm_policy_lock);
448
449         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
450 }
451
452 static inline int xfrm_bydst_should_resize(int dir, int *total)
453 {
454         unsigned int cnt = xfrm_policy_count[dir];
455         unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
456
457         if (total)
458                 *total += cnt;
459
460         if ((hmask + 1) < xfrm_policy_hashmax &&
461             cnt > hmask)
462                 return 1;
463
464         return 0;
465 }
466
467 static inline int xfrm_byidx_should_resize(int total)
468 {
469         unsigned int hmask = init_net.xfrm.policy_idx_hmask;
470
471         if ((hmask + 1) < xfrm_policy_hashmax &&
472             total > hmask)
473                 return 1;
474
475         return 0;
476 }
477
478 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
479 {
480         read_lock_bh(&xfrm_policy_lock);
481         si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
482         si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
483         si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
484         si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
485         si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
486         si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
487         si->spdhcnt = init_net.xfrm.policy_idx_hmask;
488         si->spdhmcnt = xfrm_policy_hashmax;
489         read_unlock_bh(&xfrm_policy_lock);
490 }
491 EXPORT_SYMBOL(xfrm_spd_getinfo);
492
493 static DEFINE_MUTEX(hash_resize_mutex);
494 static void xfrm_hash_resize(struct work_struct *__unused)
495 {
496         int dir, total;
497
498         mutex_lock(&hash_resize_mutex);
499
500         total = 0;
501         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
502                 if (xfrm_bydst_should_resize(dir, &total))
503                         xfrm_bydst_resize(dir);
504         }
505         if (xfrm_byidx_should_resize(total))
506                 xfrm_byidx_resize(total);
507
508         mutex_unlock(&hash_resize_mutex);
509 }
510
511 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
512
513 /* Generate new index... KAME seems to generate them ordered by cost
514  * of an absolute inpredictability of ordering of rules. This will not pass. */
515 static u32 xfrm_gen_index(int dir)
516 {
517         static u32 idx_generator;
518
519         for (;;) {
520                 struct hlist_node *entry;
521                 struct hlist_head *list;
522                 struct xfrm_policy *p;
523                 u32 idx;
524                 int found;
525
526                 idx = (idx_generator | dir);
527                 idx_generator += 8;
528                 if (idx == 0)
529                         idx = 8;
530                 list = init_net.xfrm.policy_byidx + idx_hash(idx);
531                 found = 0;
532                 hlist_for_each_entry(p, entry, list, byidx) {
533                         if (p->index == idx) {
534                                 found = 1;
535                                 break;
536                         }
537                 }
538                 if (!found)
539                         return idx;
540         }
541 }
542
543 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
544 {
545         u32 *p1 = (u32 *) s1;
546         u32 *p2 = (u32 *) s2;
547         int len = sizeof(struct xfrm_selector) / sizeof(u32);
548         int i;
549
550         for (i = 0; i < len; i++) {
551                 if (p1[i] != p2[i])
552                         return 1;
553         }
554
555         return 0;
556 }
557
558 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
559 {
560         struct xfrm_policy *pol;
561         struct xfrm_policy *delpol;
562         struct hlist_head *chain;
563         struct hlist_node *entry, *newpos;
564         struct dst_entry *gc_list;
565
566         write_lock_bh(&xfrm_policy_lock);
567         chain = policy_hash_bysel(&policy->selector, policy->family, dir);
568         delpol = NULL;
569         newpos = NULL;
570         hlist_for_each_entry(pol, entry, chain, bydst) {
571                 if (pol->type == policy->type &&
572                     !selector_cmp(&pol->selector, &policy->selector) &&
573                     xfrm_sec_ctx_match(pol->security, policy->security) &&
574                     !WARN_ON(delpol)) {
575                         if (excl) {
576                                 write_unlock_bh(&xfrm_policy_lock);
577                                 return -EEXIST;
578                         }
579                         delpol = pol;
580                         if (policy->priority > pol->priority)
581                                 continue;
582                 } else if (policy->priority >= pol->priority) {
583                         newpos = &pol->bydst;
584                         continue;
585                 }
586                 if (delpol)
587                         break;
588         }
589         if (newpos)
590                 hlist_add_after(newpos, &policy->bydst);
591         else
592                 hlist_add_head(&policy->bydst, chain);
593         xfrm_pol_hold(policy);
594         xfrm_policy_count[dir]++;
595         atomic_inc(&flow_cache_genid);
596         if (delpol) {
597                 hlist_del(&delpol->bydst);
598                 hlist_del(&delpol->byidx);
599                 list_del(&delpol->walk.all);
600                 xfrm_policy_count[dir]--;
601         }
602         policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
603         hlist_add_head(&policy->byidx, init_net.xfrm.policy_byidx+idx_hash(policy->index));
604         policy->curlft.add_time = get_seconds();
605         policy->curlft.use_time = 0;
606         if (!mod_timer(&policy->timer, jiffies + HZ))
607                 xfrm_pol_hold(policy);
608         list_add(&policy->walk.all, &init_net.xfrm.policy_all);
609         write_unlock_bh(&xfrm_policy_lock);
610
611         if (delpol)
612                 xfrm_policy_kill(delpol);
613         else if (xfrm_bydst_should_resize(dir, NULL))
614                 schedule_work(&xfrm_hash_work);
615
616         read_lock_bh(&xfrm_policy_lock);
617         gc_list = NULL;
618         entry = &policy->bydst;
619         hlist_for_each_entry_continue(policy, entry, bydst) {
620                 struct dst_entry *dst;
621
622                 write_lock(&policy->lock);
623                 dst = policy->bundles;
624                 if (dst) {
625                         struct dst_entry *tail = dst;
626                         while (tail->next)
627                                 tail = tail->next;
628                         tail->next = gc_list;
629                         gc_list = dst;
630
631                         policy->bundles = NULL;
632                 }
633                 write_unlock(&policy->lock);
634         }
635         read_unlock_bh(&xfrm_policy_lock);
636
637         while (gc_list) {
638                 struct dst_entry *dst = gc_list;
639
640                 gc_list = dst->next;
641                 dst_free(dst);
642         }
643
644         return 0;
645 }
646 EXPORT_SYMBOL(xfrm_policy_insert);
647
648 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
649                                           struct xfrm_selector *sel,
650                                           struct xfrm_sec_ctx *ctx, int delete,
651                                           int *err)
652 {
653         struct xfrm_policy *pol, *ret;
654         struct hlist_head *chain;
655         struct hlist_node *entry;
656
657         *err = 0;
658         write_lock_bh(&xfrm_policy_lock);
659         chain = policy_hash_bysel(sel, sel->family, dir);
660         ret = NULL;
661         hlist_for_each_entry(pol, entry, chain, bydst) {
662                 if (pol->type == type &&
663                     !selector_cmp(sel, &pol->selector) &&
664                     xfrm_sec_ctx_match(ctx, pol->security)) {
665                         xfrm_pol_hold(pol);
666                         if (delete) {
667                                 *err = security_xfrm_policy_delete(
668                                                                 pol->security);
669                                 if (*err) {
670                                         write_unlock_bh(&xfrm_policy_lock);
671                                         return pol;
672                                 }
673                                 hlist_del(&pol->bydst);
674                                 hlist_del(&pol->byidx);
675                                 list_del(&pol->walk.all);
676                                 xfrm_policy_count[dir]--;
677                         }
678                         ret = pol;
679                         break;
680                 }
681         }
682         write_unlock_bh(&xfrm_policy_lock);
683
684         if (ret && delete) {
685                 atomic_inc(&flow_cache_genid);
686                 xfrm_policy_kill(ret);
687         }
688         return ret;
689 }
690 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
691
692 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
693                                      int *err)
694 {
695         struct xfrm_policy *pol, *ret;
696         struct hlist_head *chain;
697         struct hlist_node *entry;
698
699         *err = -ENOENT;
700         if (xfrm_policy_id2dir(id) != dir)
701                 return NULL;
702
703         *err = 0;
704         write_lock_bh(&xfrm_policy_lock);
705         chain = init_net.xfrm.policy_byidx + idx_hash(id);
706         ret = NULL;
707         hlist_for_each_entry(pol, entry, chain, byidx) {
708                 if (pol->type == type && pol->index == id) {
709                         xfrm_pol_hold(pol);
710                         if (delete) {
711                                 *err = security_xfrm_policy_delete(
712                                                                 pol->security);
713                                 if (*err) {
714                                         write_unlock_bh(&xfrm_policy_lock);
715                                         return pol;
716                                 }
717                                 hlist_del(&pol->bydst);
718                                 hlist_del(&pol->byidx);
719                                 list_del(&pol->walk.all);
720                                 xfrm_policy_count[dir]--;
721                         }
722                         ret = pol;
723                         break;
724                 }
725         }
726         write_unlock_bh(&xfrm_policy_lock);
727
728         if (ret && delete) {
729                 atomic_inc(&flow_cache_genid);
730                 xfrm_policy_kill(ret);
731         }
732         return ret;
733 }
734 EXPORT_SYMBOL(xfrm_policy_byid);
735
736 #ifdef CONFIG_SECURITY_NETWORK_XFRM
737 static inline int
738 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
739 {
740         int dir, err = 0;
741
742         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
743                 struct xfrm_policy *pol;
744                 struct hlist_node *entry;
745                 int i;
746
747                 hlist_for_each_entry(pol, entry,
748                                      &init_net.xfrm.policy_inexact[dir], bydst) {
749                         if (pol->type != type)
750                                 continue;
751                         err = security_xfrm_policy_delete(pol->security);
752                         if (err) {
753                                 xfrm_audit_policy_delete(pol, 0,
754                                                          audit_info->loginuid,
755                                                          audit_info->sessionid,
756                                                          audit_info->secid);
757                                 return err;
758                         }
759                 }
760                 for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
761                         hlist_for_each_entry(pol, entry,
762                                              init_net.xfrm.policy_bydst[dir].table + i,
763                                              bydst) {
764                                 if (pol->type != type)
765                                         continue;
766                                 err = security_xfrm_policy_delete(
767                                                                 pol->security);
768                                 if (err) {
769                                         xfrm_audit_policy_delete(pol, 0,
770                                                         audit_info->loginuid,
771                                                         audit_info->sessionid,
772                                                         audit_info->secid);
773                                         return err;
774                                 }
775                         }
776                 }
777         }
778         return err;
779 }
780 #else
781 static inline int
782 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
783 {
784         return 0;
785 }
786 #endif
787
788 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
789 {
790         int dir, err = 0;
791
792         write_lock_bh(&xfrm_policy_lock);
793
794         err = xfrm_policy_flush_secctx_check(type, audit_info);
795         if (err)
796                 goto out;
797
798         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
799                 struct xfrm_policy *pol;
800                 struct hlist_node *entry;
801                 int i, killed;
802
803                 killed = 0;
804         again1:
805                 hlist_for_each_entry(pol, entry,
806                                      &init_net.xfrm.policy_inexact[dir], bydst) {
807                         if (pol->type != type)
808                                 continue;
809                         hlist_del(&pol->bydst);
810                         hlist_del(&pol->byidx);
811                         write_unlock_bh(&xfrm_policy_lock);
812
813                         xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
814                                                  audit_info->sessionid,
815                                                  audit_info->secid);
816
817                         xfrm_policy_kill(pol);
818                         killed++;
819
820                         write_lock_bh(&xfrm_policy_lock);
821                         goto again1;
822                 }
823
824                 for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
825         again2:
826                         hlist_for_each_entry(pol, entry,
827                                              init_net.xfrm.policy_bydst[dir].table + i,
828                                              bydst) {
829                                 if (pol->type != type)
830                                         continue;
831                                 hlist_del(&pol->bydst);
832                                 hlist_del(&pol->byidx);
833                                 list_del(&pol->walk.all);
834                                 write_unlock_bh(&xfrm_policy_lock);
835
836                                 xfrm_audit_policy_delete(pol, 1,
837                                                          audit_info->loginuid,
838                                                          audit_info->sessionid,
839                                                          audit_info->secid);
840                                 xfrm_policy_kill(pol);
841                                 killed++;
842
843                                 write_lock_bh(&xfrm_policy_lock);
844                                 goto again2;
845                         }
846                 }
847
848                 xfrm_policy_count[dir] -= killed;
849         }
850         atomic_inc(&flow_cache_genid);
851 out:
852         write_unlock_bh(&xfrm_policy_lock);
853         return err;
854 }
855 EXPORT_SYMBOL(xfrm_policy_flush);
856
857 int xfrm_policy_walk(struct xfrm_policy_walk *walk,
858                      int (*func)(struct xfrm_policy *, int, int, void*),
859                      void *data)
860 {
861         struct xfrm_policy *pol;
862         struct xfrm_policy_walk_entry *x;
863         int error = 0;
864
865         if (walk->type >= XFRM_POLICY_TYPE_MAX &&
866             walk->type != XFRM_POLICY_TYPE_ANY)
867                 return -EINVAL;
868
869         if (list_empty(&walk->walk.all) && walk->seq != 0)
870                 return 0;
871
872         write_lock_bh(&xfrm_policy_lock);
873         if (list_empty(&walk->walk.all))
874                 x = list_first_entry(&init_net.xfrm.policy_all, struct xfrm_policy_walk_entry, all);
875         else
876                 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
877         list_for_each_entry_from(x, &init_net.xfrm.policy_all, all) {
878                 if (x->dead)
879                         continue;
880                 pol = container_of(x, struct xfrm_policy, walk);
881                 if (walk->type != XFRM_POLICY_TYPE_ANY &&
882                     walk->type != pol->type)
883                         continue;
884                 error = func(pol, xfrm_policy_id2dir(pol->index),
885                              walk->seq, data);
886                 if (error) {
887                         list_move_tail(&walk->walk.all, &x->all);
888                         goto out;
889                 }
890                 walk->seq++;
891         }
892         if (walk->seq == 0) {
893                 error = -ENOENT;
894                 goto out;
895         }
896         list_del_init(&walk->walk.all);
897 out:
898         write_unlock_bh(&xfrm_policy_lock);
899         return error;
900 }
901 EXPORT_SYMBOL(xfrm_policy_walk);
902
903 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
904 {
905         INIT_LIST_HEAD(&walk->walk.all);
906         walk->walk.dead = 1;
907         walk->type = type;
908         walk->seq = 0;
909 }
910 EXPORT_SYMBOL(xfrm_policy_walk_init);
911
912 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
913 {
914         if (list_empty(&walk->walk.all))
915                 return;
916
917         write_lock_bh(&xfrm_policy_lock);
918         list_del(&walk->walk.all);
919         write_unlock_bh(&xfrm_policy_lock);
920 }
921 EXPORT_SYMBOL(xfrm_policy_walk_done);
922
923 /*
924  * Find policy to apply to this flow.
925  *
926  * Returns 0 if policy found, else an -errno.
927  */
928 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
929                              u8 type, u16 family, int dir)
930 {
931         struct xfrm_selector *sel = &pol->selector;
932         int match, ret = -ESRCH;
933
934         if (pol->family != family ||
935             pol->type != type)
936                 return ret;
937
938         match = xfrm_selector_match(sel, fl, family);
939         if (match)
940                 ret = security_xfrm_policy_lookup(pol->security, fl->secid,
941                                                   dir);
942
943         return ret;
944 }
945
946 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
947                                                      u16 family, u8 dir)
948 {
949         int err;
950         struct xfrm_policy *pol, *ret;
951         xfrm_address_t *daddr, *saddr;
952         struct hlist_node *entry;
953         struct hlist_head *chain;
954         u32 priority = ~0U;
955
956         daddr = xfrm_flowi_daddr(fl, family);
957         saddr = xfrm_flowi_saddr(fl, family);
958         if (unlikely(!daddr || !saddr))
959                 return NULL;
960
961         read_lock_bh(&xfrm_policy_lock);
962         chain = policy_hash_direct(daddr, saddr, family, dir);
963         ret = NULL;
964         hlist_for_each_entry(pol, entry, chain, bydst) {
965                 err = xfrm_policy_match(pol, fl, type, family, dir);
966                 if (err) {
967                         if (err == -ESRCH)
968                                 continue;
969                         else {
970                                 ret = ERR_PTR(err);
971                                 goto fail;
972                         }
973                 } else {
974                         ret = pol;
975                         priority = ret->priority;
976                         break;
977                 }
978         }
979         chain = &init_net.xfrm.policy_inexact[dir];
980         hlist_for_each_entry(pol, entry, chain, bydst) {
981                 err = xfrm_policy_match(pol, fl, type, family, dir);
982                 if (err) {
983                         if (err == -ESRCH)
984                                 continue;
985                         else {
986                                 ret = ERR_PTR(err);
987                                 goto fail;
988                         }
989                 } else if (pol->priority < priority) {
990                         ret = pol;
991                         break;
992                 }
993         }
994         if (ret)
995                 xfrm_pol_hold(ret);
996 fail:
997         read_unlock_bh(&xfrm_policy_lock);
998
999         return ret;
1000 }
1001
1002 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
1003                                void **objp, atomic_t **obj_refp)
1004 {
1005         struct xfrm_policy *pol;
1006         int err = 0;
1007
1008 #ifdef CONFIG_XFRM_SUB_POLICY
1009         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
1010         if (IS_ERR(pol)) {
1011                 err = PTR_ERR(pol);
1012                 pol = NULL;
1013         }
1014         if (pol || err)
1015                 goto end;
1016 #endif
1017         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1018         if (IS_ERR(pol)) {
1019                 err = PTR_ERR(pol);
1020                 pol = NULL;
1021         }
1022 #ifdef CONFIG_XFRM_SUB_POLICY
1023 end:
1024 #endif
1025         if ((*objp = (void *) pol) != NULL)
1026                 *obj_refp = &pol->refcnt;
1027         return err;
1028 }
1029
1030 static inline int policy_to_flow_dir(int dir)
1031 {
1032         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1033             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1034             XFRM_POLICY_FWD == FLOW_DIR_FWD)
1035                 return dir;
1036         switch (dir) {
1037         default:
1038         case XFRM_POLICY_IN:
1039                 return FLOW_DIR_IN;
1040         case XFRM_POLICY_OUT:
1041                 return FLOW_DIR_OUT;
1042         case XFRM_POLICY_FWD:
1043                 return FLOW_DIR_FWD;
1044         }
1045 }
1046
1047 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
1048 {
1049         struct xfrm_policy *pol;
1050
1051         read_lock_bh(&xfrm_policy_lock);
1052         if ((pol = sk->sk_policy[dir]) != NULL) {
1053                 int match = xfrm_selector_match(&pol->selector, fl,
1054                                                 sk->sk_family);
1055                 int err = 0;
1056
1057                 if (match) {
1058                         err = security_xfrm_policy_lookup(pol->security,
1059                                                       fl->secid,
1060                                                       policy_to_flow_dir(dir));
1061                         if (!err)
1062                                 xfrm_pol_hold(pol);
1063                         else if (err == -ESRCH)
1064                                 pol = NULL;
1065                         else
1066                                 pol = ERR_PTR(err);
1067                 } else
1068                         pol = NULL;
1069         }
1070         read_unlock_bh(&xfrm_policy_lock);
1071         return pol;
1072 }
1073
1074 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1075 {
1076         struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1077                                                      pol->family, dir);
1078
1079         list_add(&pol->walk.all, &init_net.xfrm.policy_all);
1080         hlist_add_head(&pol->bydst, chain);
1081         hlist_add_head(&pol->byidx, init_net.xfrm.policy_byidx+idx_hash(pol->index));
1082         xfrm_policy_count[dir]++;
1083         xfrm_pol_hold(pol);
1084
1085         if (xfrm_bydst_should_resize(dir, NULL))
1086                 schedule_work(&xfrm_hash_work);
1087 }
1088
1089 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1090                                                 int dir)
1091 {
1092         if (hlist_unhashed(&pol->bydst))
1093                 return NULL;
1094
1095         hlist_del(&pol->bydst);
1096         hlist_del(&pol->byidx);
1097         list_del(&pol->walk.all);
1098         xfrm_policy_count[dir]--;
1099
1100         return pol;
1101 }
1102
1103 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1104 {
1105         write_lock_bh(&xfrm_policy_lock);
1106         pol = __xfrm_policy_unlink(pol, dir);
1107         write_unlock_bh(&xfrm_policy_lock);
1108         if (pol) {
1109                 if (dir < XFRM_POLICY_MAX)
1110                         atomic_inc(&flow_cache_genid);
1111                 xfrm_policy_kill(pol);
1112                 return 0;
1113         }
1114         return -ENOENT;
1115 }
1116 EXPORT_SYMBOL(xfrm_policy_delete);
1117
1118 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1119 {
1120         struct xfrm_policy *old_pol;
1121
1122 #ifdef CONFIG_XFRM_SUB_POLICY
1123         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1124                 return -EINVAL;
1125 #endif
1126
1127         write_lock_bh(&xfrm_policy_lock);
1128         old_pol = sk->sk_policy[dir];
1129         sk->sk_policy[dir] = pol;
1130         if (pol) {
1131                 pol->curlft.add_time = get_seconds();
1132                 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
1133                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1134         }
1135         if (old_pol)
1136                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1137         write_unlock_bh(&xfrm_policy_lock);
1138
1139         if (old_pol) {
1140                 xfrm_policy_kill(old_pol);
1141         }
1142         return 0;
1143 }
1144
1145 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1146 {
1147         struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1148
1149         if (newp) {
1150                 newp->selector = old->selector;
1151                 if (security_xfrm_policy_clone(old->security,
1152                                                &newp->security)) {
1153                         kfree(newp);
1154                         return NULL;  /* ENOMEM */
1155                 }
1156                 newp->lft = old->lft;
1157                 newp->curlft = old->curlft;
1158                 newp->action = old->action;
1159                 newp->flags = old->flags;
1160                 newp->xfrm_nr = old->xfrm_nr;
1161                 newp->index = old->index;
1162                 newp->type = old->type;
1163                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1164                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1165                 write_lock_bh(&xfrm_policy_lock);
1166                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1167                 write_unlock_bh(&xfrm_policy_lock);
1168                 xfrm_pol_put(newp);
1169         }
1170         return newp;
1171 }
1172
1173 int __xfrm_sk_clone_policy(struct sock *sk)
1174 {
1175         struct xfrm_policy *p0 = sk->sk_policy[0],
1176                            *p1 = sk->sk_policy[1];
1177
1178         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1179         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1180                 return -ENOMEM;
1181         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1182                 return -ENOMEM;
1183         return 0;
1184 }
1185
1186 static int
1187 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1188                unsigned short family)
1189 {
1190         int err;
1191         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1192
1193         if (unlikely(afinfo == NULL))
1194                 return -EINVAL;
1195         err = afinfo->get_saddr(local, remote);
1196         xfrm_policy_put_afinfo(afinfo);
1197         return err;
1198 }
1199
1200 /* Resolve list of templates for the flow, given policy. */
1201
1202 static int
1203 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1204                       struct xfrm_state **xfrm,
1205                       unsigned short family)
1206 {
1207         int nx;
1208         int i, error;
1209         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1210         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1211         xfrm_address_t tmp;
1212
1213         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1214                 struct xfrm_state *x;
1215                 xfrm_address_t *remote = daddr;
1216                 xfrm_address_t *local  = saddr;
1217                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1218
1219                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1220                     tmpl->mode == XFRM_MODE_BEET) {
1221                         remote = &tmpl->id.daddr;
1222                         local = &tmpl->saddr;
1223                         family = tmpl->encap_family;
1224                         if (xfrm_addr_any(local, family)) {
1225                                 error = xfrm_get_saddr(&tmp, remote, family);
1226                                 if (error)
1227                                         goto fail;
1228                                 local = &tmp;
1229                         }
1230                 }
1231
1232                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1233
1234                 if (x && x->km.state == XFRM_STATE_VALID) {
1235                         xfrm[nx++] = x;
1236                         daddr = remote;
1237                         saddr = local;
1238                         continue;
1239                 }
1240                 if (x) {
1241                         error = (x->km.state == XFRM_STATE_ERROR ?
1242                                  -EINVAL : -EAGAIN);
1243                         xfrm_state_put(x);
1244                 }
1245                 else if (error == -ESRCH)
1246                         error = -EAGAIN;
1247
1248                 if (!tmpl->optional)
1249                         goto fail;
1250         }
1251         return nx;
1252
1253 fail:
1254         for (nx--; nx>=0; nx--)
1255                 xfrm_state_put(xfrm[nx]);
1256         return error;
1257 }
1258
1259 static int
1260 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1261                   struct xfrm_state **xfrm,
1262                   unsigned short family)
1263 {
1264         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1265         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1266         int cnx = 0;
1267         int error;
1268         int ret;
1269         int i;
1270
1271         for (i = 0; i < npols; i++) {
1272                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1273                         error = -ENOBUFS;
1274                         goto fail;
1275                 }
1276
1277                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1278                 if (ret < 0) {
1279                         error = ret;
1280                         goto fail;
1281                 } else
1282                         cnx += ret;
1283         }
1284
1285         /* found states are sorted for outbound processing */
1286         if (npols > 1)
1287                 xfrm_state_sort(xfrm, tpp, cnx, family);
1288
1289         return cnx;
1290
1291  fail:
1292         for (cnx--; cnx>=0; cnx--)
1293                 xfrm_state_put(tpp[cnx]);
1294         return error;
1295
1296 }
1297
1298 /* Check that the bundle accepts the flow and its components are
1299  * still valid.
1300  */
1301
1302 static struct dst_entry *
1303 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1304 {
1305         struct dst_entry *x;
1306         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1307         if (unlikely(afinfo == NULL))
1308                 return ERR_PTR(-EINVAL);
1309         x = afinfo->find_bundle(fl, policy);
1310         xfrm_policy_put_afinfo(afinfo);
1311         return x;
1312 }
1313
1314 static inline int xfrm_get_tos(struct flowi *fl, int family)
1315 {
1316         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1317         int tos;
1318
1319         if (!afinfo)
1320                 return -EINVAL;
1321
1322         tos = afinfo->get_tos(fl);
1323
1324         xfrm_policy_put_afinfo(afinfo);
1325
1326         return tos;
1327 }
1328
1329 static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1330 {
1331         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1332         struct xfrm_dst *xdst;
1333
1334         if (!afinfo)
1335                 return ERR_PTR(-EINVAL);
1336
1337         xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1338
1339         xfrm_policy_put_afinfo(afinfo);
1340
1341         return xdst;
1342 }
1343
1344 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1345                                  int nfheader_len)
1346 {
1347         struct xfrm_policy_afinfo *afinfo =
1348                 xfrm_policy_get_afinfo(dst->ops->family);
1349         int err;
1350
1351         if (!afinfo)
1352                 return -EINVAL;
1353
1354         err = afinfo->init_path(path, dst, nfheader_len);
1355
1356         xfrm_policy_put_afinfo(afinfo);
1357
1358         return err;
1359 }
1360
1361 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1362 {
1363         struct xfrm_policy_afinfo *afinfo =
1364                 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1365         int err;
1366
1367         if (!afinfo)
1368                 return -EINVAL;
1369
1370         err = afinfo->fill_dst(xdst, dev);
1371
1372         xfrm_policy_put_afinfo(afinfo);
1373
1374         return err;
1375 }
1376
1377 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1378  * all the metrics... Shortly, bundle a bundle.
1379  */
1380
1381 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1382                                             struct xfrm_state **xfrm, int nx,
1383                                             struct flowi *fl,
1384                                             struct dst_entry *dst)
1385 {
1386         unsigned long now = jiffies;
1387         struct net_device *dev;
1388         struct dst_entry *dst_prev = NULL;
1389         struct dst_entry *dst0 = NULL;
1390         int i = 0;
1391         int err;
1392         int header_len = 0;
1393         int nfheader_len = 0;
1394         int trailer_len = 0;
1395         int tos;
1396         int family = policy->selector.family;
1397         xfrm_address_t saddr, daddr;
1398
1399         xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1400
1401         tos = xfrm_get_tos(fl, family);
1402         err = tos;
1403         if (tos < 0)
1404                 goto put_states;
1405
1406         dst_hold(dst);
1407
1408         for (; i < nx; i++) {
1409                 struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1410                 struct dst_entry *dst1 = &xdst->u.dst;
1411
1412                 err = PTR_ERR(xdst);
1413                 if (IS_ERR(xdst)) {
1414                         dst_release(dst);
1415                         goto put_states;
1416                 }
1417
1418                 if (!dst_prev)
1419                         dst0 = dst1;
1420                 else {
1421                         dst_prev->child = dst_clone(dst1);
1422                         dst1->flags |= DST_NOHASH;
1423                 }
1424
1425                 xdst->route = dst;
1426                 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1427
1428                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1429                         family = xfrm[i]->props.family;
1430                         dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1431                                               family);
1432                         err = PTR_ERR(dst);
1433                         if (IS_ERR(dst))
1434                                 goto put_states;
1435                 } else
1436                         dst_hold(dst);
1437
1438                 dst1->xfrm = xfrm[i];
1439                 xdst->genid = xfrm[i]->genid;
1440
1441                 dst1->obsolete = -1;
1442                 dst1->flags |= DST_HOST;
1443                 dst1->lastuse = now;
1444
1445                 dst1->input = dst_discard;
1446                 dst1->output = xfrm[i]->outer_mode->afinfo->output;
1447
1448                 dst1->next = dst_prev;
1449                 dst_prev = dst1;
1450
1451                 header_len += xfrm[i]->props.header_len;
1452                 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1453                         nfheader_len += xfrm[i]->props.header_len;
1454                 trailer_len += xfrm[i]->props.trailer_len;
1455         }
1456
1457         dst_prev->child = dst;
1458         dst0->path = dst;
1459
1460         err = -ENODEV;
1461         dev = dst->dev;
1462         if (!dev)
1463                 goto free_dst;
1464
1465         /* Copy neighbout for reachability confirmation */
1466         dst0->neighbour = neigh_clone(dst->neighbour);
1467
1468         xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1469         xfrm_init_pmtu(dst_prev);
1470
1471         for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1472                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1473
1474                 err = xfrm_fill_dst(xdst, dev);
1475                 if (err)
1476                         goto free_dst;
1477
1478                 dst_prev->header_len = header_len;
1479                 dst_prev->trailer_len = trailer_len;
1480                 header_len -= xdst->u.dst.xfrm->props.header_len;
1481                 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1482         }
1483
1484 out:
1485         return dst0;
1486
1487 put_states:
1488         for (; i < nx; i++)
1489                 xfrm_state_put(xfrm[i]);
1490 free_dst:
1491         if (dst0)
1492                 dst_free(dst0);
1493         dst0 = ERR_PTR(err);
1494         goto out;
1495 }
1496
1497 static int inline
1498 xfrm_dst_alloc_copy(void **target, void *src, int size)
1499 {
1500         if (!*target) {
1501                 *target = kmalloc(size, GFP_ATOMIC);
1502                 if (!*target)
1503                         return -ENOMEM;
1504         }
1505         memcpy(*target, src, size);
1506         return 0;
1507 }
1508
1509 static int inline
1510 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1511 {
1512 #ifdef CONFIG_XFRM_SUB_POLICY
1513         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1514         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1515                                    sel, sizeof(*sel));
1516 #else
1517         return 0;
1518 #endif
1519 }
1520
1521 static int inline
1522 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1523 {
1524 #ifdef CONFIG_XFRM_SUB_POLICY
1525         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1526         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1527 #else
1528         return 0;
1529 #endif
1530 }
1531
1532 static int stale_bundle(struct dst_entry *dst);
1533
1534 /* Main function: finds/creates a bundle for given flow.
1535  *
1536  * At the moment we eat a raw IP route. Mostly to speed up lookups
1537  * on interfaces with disabled IPsec.
1538  */
1539 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1540                   struct sock *sk, int flags)
1541 {
1542         struct xfrm_policy *policy;
1543         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1544         int npols;
1545         int pol_dead;
1546         int xfrm_nr;
1547         int pi;
1548         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1549         struct dst_entry *dst, *dst_orig = *dst_p;
1550         int nx = 0;
1551         int err;
1552         u32 genid;
1553         u16 family;
1554         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1555
1556 restart:
1557         genid = atomic_read(&flow_cache_genid);
1558         policy = NULL;
1559         for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1560                 pols[pi] = NULL;
1561         npols = 0;
1562         pol_dead = 0;
1563         xfrm_nr = 0;
1564
1565         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1566                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1567                 err = PTR_ERR(policy);
1568                 if (IS_ERR(policy)) {
1569                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1570                         goto dropdst;
1571                 }
1572         }
1573
1574         if (!policy) {
1575                 /* To accelerate a bit...  */
1576                 if ((dst_orig->flags & DST_NOXFRM) ||
1577                     !xfrm_policy_count[XFRM_POLICY_OUT])
1578                         goto nopol;
1579
1580                 policy = flow_cache_lookup(fl, dst_orig->ops->family,
1581                                            dir, xfrm_policy_lookup);
1582                 err = PTR_ERR(policy);
1583                 if (IS_ERR(policy)) {
1584                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1585                         goto dropdst;
1586                 }
1587         }
1588
1589         if (!policy)
1590                 goto nopol;
1591
1592         family = dst_orig->ops->family;
1593         pols[0] = policy;
1594         npols ++;
1595         xfrm_nr += pols[0]->xfrm_nr;
1596
1597         err = -ENOENT;
1598         if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1599                 goto error;
1600
1601         policy->curlft.use_time = get_seconds();
1602
1603         switch (policy->action) {
1604         default:
1605         case XFRM_POLICY_BLOCK:
1606                 /* Prohibit the flow */
1607                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1608                 err = -EPERM;
1609                 goto error;
1610
1611         case XFRM_POLICY_ALLOW:
1612 #ifndef CONFIG_XFRM_SUB_POLICY
1613                 if (policy->xfrm_nr == 0) {
1614                         /* Flow passes not transformed. */
1615                         xfrm_pol_put(policy);
1616                         return 0;
1617                 }
1618 #endif
1619
1620                 /* Try to find matching bundle.
1621                  *
1622                  * LATER: help from flow cache. It is optional, this
1623                  * is required only for output policy.
1624                  */
1625                 dst = xfrm_find_bundle(fl, policy, family);
1626                 if (IS_ERR(dst)) {
1627                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1628                         err = PTR_ERR(dst);
1629                         goto error;
1630                 }
1631
1632                 if (dst)
1633                         break;
1634
1635 #ifdef CONFIG_XFRM_SUB_POLICY
1636                 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1637                         pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1638                                                             fl, family,
1639                                                             XFRM_POLICY_OUT);
1640                         if (pols[1]) {
1641                                 if (IS_ERR(pols[1])) {
1642                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1643                                         err = PTR_ERR(pols[1]);
1644                                         goto error;
1645                                 }
1646                                 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1647                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1648                                         err = -EPERM;
1649                                         goto error;
1650                                 }
1651                                 npols ++;
1652                                 xfrm_nr += pols[1]->xfrm_nr;
1653                         }
1654                 }
1655
1656                 /*
1657                  * Because neither flowi nor bundle information knows about
1658                  * transformation template size. On more than one policy usage
1659                  * we can realize whether all of them is bypass or not after
1660                  * they are searched. See above not-transformed bypass
1661                  * is surrounded by non-sub policy configuration, too.
1662                  */
1663                 if (xfrm_nr == 0) {
1664                         /* Flow passes not transformed. */
1665                         xfrm_pols_put(pols, npols);
1666                         return 0;
1667                 }
1668
1669 #endif
1670                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1671
1672                 if (unlikely(nx<0)) {
1673                         err = nx;
1674                         if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1675                                 /* EREMOTE tells the caller to generate
1676                                  * a one-shot blackhole route.
1677                                  */
1678                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1679                                 xfrm_pol_put(policy);
1680                                 return -EREMOTE;
1681                         }
1682                         if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1683                                 DECLARE_WAITQUEUE(wait, current);
1684
1685                                 add_wait_queue(&init_net.xfrm.km_waitq, &wait);
1686                                 set_current_state(TASK_INTERRUPTIBLE);
1687                                 schedule();
1688                                 set_current_state(TASK_RUNNING);
1689                                 remove_wait_queue(&init_net.xfrm.km_waitq, &wait);
1690
1691                                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1692
1693                                 if (nx == -EAGAIN && signal_pending(current)) {
1694                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1695                                         err = -ERESTART;
1696                                         goto error;
1697                                 }
1698                                 if (nx == -EAGAIN ||
1699                                     genid != atomic_read(&flow_cache_genid)) {
1700                                         xfrm_pols_put(pols, npols);
1701                                         goto restart;
1702                                 }
1703                                 err = nx;
1704                         }
1705                         if (err < 0) {
1706                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1707                                 goto error;
1708                         }
1709                 }
1710                 if (nx == 0) {
1711                         /* Flow passes not transformed. */
1712                         xfrm_pols_put(pols, npols);
1713                         return 0;
1714                 }
1715
1716                 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1717                 err = PTR_ERR(dst);
1718                 if (IS_ERR(dst)) {
1719                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1720                         goto error;
1721                 }
1722
1723                 for (pi = 0; pi < npols; pi++) {
1724                         read_lock_bh(&pols[pi]->lock);
1725                         pol_dead |= pols[pi]->walk.dead;
1726                         read_unlock_bh(&pols[pi]->lock);
1727                 }
1728
1729                 write_lock_bh(&policy->lock);
1730                 if (unlikely(pol_dead || stale_bundle(dst))) {
1731                         /* Wow! While we worked on resolving, this
1732                          * policy has gone. Retry. It is not paranoia,
1733                          * we just cannot enlist new bundle to dead object.
1734                          * We can't enlist stable bundles either.
1735                          */
1736                         write_unlock_bh(&policy->lock);
1737                         dst_free(dst);
1738
1739                         if (pol_dead)
1740                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1741                         else
1742                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1743                         err = -EHOSTUNREACH;
1744                         goto error;
1745                 }
1746
1747                 if (npols > 1)
1748                         err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1749                 else
1750                         err = xfrm_dst_update_origin(dst, fl);
1751                 if (unlikely(err)) {
1752                         write_unlock_bh(&policy->lock);
1753                         dst_free(dst);
1754                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1755                         goto error;
1756                 }
1757
1758                 dst->next = policy->bundles;
1759                 policy->bundles = dst;
1760                 dst_hold(dst);
1761                 write_unlock_bh(&policy->lock);
1762         }
1763         *dst_p = dst;
1764         dst_release(dst_orig);
1765         xfrm_pols_put(pols, npols);
1766         return 0;
1767
1768 error:
1769         xfrm_pols_put(pols, npols);
1770 dropdst:
1771         dst_release(dst_orig);
1772         *dst_p = NULL;
1773         return err;
1774
1775 nopol:
1776         err = -ENOENT;
1777         if (flags & XFRM_LOOKUP_ICMP)
1778                 goto dropdst;
1779         return 0;
1780 }
1781 EXPORT_SYMBOL(__xfrm_lookup);
1782
1783 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1784                 struct sock *sk, int flags)
1785 {
1786         int err = __xfrm_lookup(dst_p, fl, sk, flags);
1787
1788         if (err == -EREMOTE) {
1789                 dst_release(*dst_p);
1790                 *dst_p = NULL;
1791                 err = -EAGAIN;
1792         }
1793
1794         return err;
1795 }
1796 EXPORT_SYMBOL(xfrm_lookup);
1797
1798 static inline int
1799 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1800 {
1801         struct xfrm_state *x;
1802
1803         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1804                 return 0;
1805         x = skb->sp->xvec[idx];
1806         if (!x->type->reject)
1807                 return 0;
1808         return x->type->reject(x, skb, fl);
1809 }
1810
1811 /* When skb is transformed back to its "native" form, we have to
1812  * check policy restrictions. At the moment we make this in maximally
1813  * stupid way. Shame on me. :-) Of course, connected sockets must
1814  * have policy cached at them.
1815  */
1816
1817 static inline int
1818 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1819               unsigned short family)
1820 {
1821         if (xfrm_state_kern(x))
1822                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1823         return  x->id.proto == tmpl->id.proto &&
1824                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1825                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1826                 x->props.mode == tmpl->mode &&
1827                 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
1828                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1829                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1830                   xfrm_state_addr_cmp(tmpl, x, family));
1831 }
1832
1833 /*
1834  * 0 or more than 0 is returned when validation is succeeded (either bypass
1835  * because of optional transport mode, or next index of the mathced secpath
1836  * state with the template.
1837  * -1 is returned when no matching template is found.
1838  * Otherwise "-2 - errored_index" is returned.
1839  */
1840 static inline int
1841 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1842                unsigned short family)
1843 {
1844         int idx = start;
1845
1846         if (tmpl->optional) {
1847                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1848                         return start;
1849         } else
1850                 start = -1;
1851         for (; idx < sp->len; idx++) {
1852                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1853                         return ++idx;
1854                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1855                         if (start == -1)
1856                                 start = -2-idx;
1857                         break;
1858                 }
1859         }
1860         return start;
1861 }
1862
1863 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1864                           unsigned int family, int reverse)
1865 {
1866         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1867         int err;
1868
1869         if (unlikely(afinfo == NULL))
1870                 return -EAFNOSUPPORT;
1871
1872         afinfo->decode_session(skb, fl, reverse);
1873         err = security_xfrm_decode_session(skb, &fl->secid);
1874         xfrm_policy_put_afinfo(afinfo);
1875         return err;
1876 }
1877 EXPORT_SYMBOL(__xfrm_decode_session);
1878
1879 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1880 {
1881         for (; k < sp->len; k++) {
1882                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1883                         *idxp = k;
1884                         return 1;
1885                 }
1886         }
1887
1888         return 0;
1889 }
1890
1891 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1892                         unsigned short family)
1893 {
1894         struct xfrm_policy *pol;
1895         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1896         int npols = 0;
1897         int xfrm_nr;
1898         int pi;
1899         int reverse;
1900         struct flowi fl;
1901         u8 fl_dir;
1902         int xerr_idx = -1;
1903
1904         reverse = dir & ~XFRM_POLICY_MASK;
1905         dir &= XFRM_POLICY_MASK;
1906         fl_dir = policy_to_flow_dir(dir);
1907
1908         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1909                 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1910                 return 0;
1911         }
1912
1913         nf_nat_decode_session(skb, &fl, family);
1914
1915         /* First, check used SA against their selectors. */
1916         if (skb->sp) {
1917                 int i;
1918
1919                 for (i=skb->sp->len-1; i>=0; i--) {
1920                         struct xfrm_state *x = skb->sp->xvec[i];
1921                         if (!xfrm_selector_match(&x->sel, &fl, family)) {
1922                                 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1923                                 return 0;
1924                         }
1925                 }
1926         }
1927
1928         pol = NULL;
1929         if (sk && sk->sk_policy[dir]) {
1930                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1931                 if (IS_ERR(pol)) {
1932                         XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1933                         return 0;
1934                 }
1935         }
1936
1937         if (!pol)
1938                 pol = flow_cache_lookup(&fl, family, fl_dir,
1939                                         xfrm_policy_lookup);
1940
1941         if (IS_ERR(pol)) {
1942                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1943                 return 0;
1944         }
1945
1946         if (!pol) {
1947                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1948                         xfrm_secpath_reject(xerr_idx, skb, &fl);
1949                         XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1950                         return 0;
1951                 }
1952                 return 1;
1953         }
1954
1955         pol->curlft.use_time = get_seconds();
1956
1957         pols[0] = pol;
1958         npols ++;
1959 #ifdef CONFIG_XFRM_SUB_POLICY
1960         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1961                 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1962                                                     &fl, family,
1963                                                     XFRM_POLICY_IN);
1964                 if (pols[1]) {
1965                         if (IS_ERR(pols[1])) {
1966                                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1967                                 return 0;
1968                         }
1969                         pols[1]->curlft.use_time = get_seconds();
1970                         npols ++;
1971                 }
1972         }
1973 #endif
1974
1975         if (pol->action == XFRM_POLICY_ALLOW) {
1976                 struct sec_path *sp;
1977                 static struct sec_path dummy;
1978                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1979                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1980                 struct xfrm_tmpl **tpp = tp;
1981                 int ti = 0;
1982                 int i, k;
1983
1984                 if ((sp = skb->sp) == NULL)
1985                         sp = &dummy;
1986
1987                 for (pi = 0; pi < npols; pi++) {
1988                         if (pols[pi] != pol &&
1989                             pols[pi]->action != XFRM_POLICY_ALLOW) {
1990                                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1991                                 goto reject;
1992                         }
1993                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1994                                 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
1995                                 goto reject_error;
1996                         }
1997                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
1998                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1999                 }
2000                 xfrm_nr = ti;
2001                 if (npols > 1) {
2002                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
2003                         tpp = stp;
2004                 }
2005
2006                 /* For each tunnel xfrm, find the first matching tmpl.
2007                  * For each tmpl before that, find corresponding xfrm.
2008                  * Order is _important_. Later we will implement
2009                  * some barriers, but at the moment barriers
2010                  * are implied between each two transformations.
2011                  */
2012                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2013                         k = xfrm_policy_ok(tpp[i], sp, k, family);
2014                         if (k < 0) {
2015                                 if (k < -1)
2016                                         /* "-2 - errored_index" returned */
2017                                         xerr_idx = -(2+k);
2018                                 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
2019                                 goto reject;
2020                         }
2021                 }
2022
2023                 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2024                         XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
2025                         goto reject;
2026                 }
2027
2028                 xfrm_pols_put(pols, npols);
2029                 return 1;
2030         }
2031         XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
2032
2033 reject:
2034         xfrm_secpath_reject(xerr_idx, skb, &fl);
2035 reject_error:
2036         xfrm_pols_put(pols, npols);
2037         return 0;
2038 }
2039 EXPORT_SYMBOL(__xfrm_policy_check);
2040
2041 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2042 {
2043         struct flowi fl;
2044
2045         if (xfrm_decode_session(skb, &fl, family) < 0) {
2046                 /* XXX: we should have something like FWDHDRERROR here. */
2047                 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
2048                 return 0;
2049         }
2050
2051         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
2052 }
2053 EXPORT_SYMBOL(__xfrm_route_forward);
2054
2055 /* Optimize later using cookies and generation ids. */
2056
2057 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2058 {
2059         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2060          * to "-1" to force all XFRM destinations to get validated by
2061          * dst_ops->check on every use.  We do this because when a
2062          * normal route referenced by an XFRM dst is obsoleted we do
2063          * not go looking around for all parent referencing XFRM dsts
2064          * so that we can invalidate them.  It is just too much work.
2065          * Instead we make the checks here on every use.  For example:
2066          *
2067          *      XFRM dst A --> IPv4 dst X
2068          *
2069          * X is the "xdst->route" of A (X is also the "dst->path" of A
2070          * in this example).  If X is marked obsolete, "A" will not
2071          * notice.  That's what we are validating here via the
2072          * stale_bundle() check.
2073          *
2074          * When a policy's bundle is pruned, we dst_free() the XFRM
2075          * dst which causes it's ->obsolete field to be set to a
2076          * positive non-zero integer.  If an XFRM dst has been pruned
2077          * like this, we want to force a new route lookup.
2078          */
2079         if (dst->obsolete < 0 && !stale_bundle(dst))
2080                 return dst;
2081
2082         return NULL;
2083 }
2084
2085 static int stale_bundle(struct dst_entry *dst)
2086 {
2087         return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2088 }
2089
2090 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2091 {
2092         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2093                 dst->dev = dev_net(dev)->loopback_dev;
2094                 dev_hold(dst->dev);
2095                 dev_put(dev);
2096         }
2097 }
2098 EXPORT_SYMBOL(xfrm_dst_ifdown);
2099
2100 static void xfrm_link_failure(struct sk_buff *skb)
2101 {
2102         /* Impossible. Such dst must be popped before reaches point of failure. */
2103         return;
2104 }
2105
2106 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2107 {
2108         if (dst) {
2109                 if (dst->obsolete) {
2110                         dst_release(dst);
2111                         dst = NULL;
2112                 }
2113         }
2114         return dst;
2115 }
2116
2117 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2118 {
2119         struct dst_entry *dst, **dstp;
2120
2121         write_lock(&pol->lock);
2122         dstp = &pol->bundles;
2123         while ((dst=*dstp) != NULL) {
2124                 if (func(dst)) {
2125                         *dstp = dst->next;
2126                         dst->next = *gc_list_p;
2127                         *gc_list_p = dst;
2128                 } else {
2129                         dstp = &dst->next;
2130                 }
2131         }
2132         write_unlock(&pol->lock);
2133 }
2134
2135 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2136 {
2137         struct dst_entry *gc_list = NULL;
2138         int dir;
2139
2140         read_lock_bh(&xfrm_policy_lock);
2141         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2142                 struct xfrm_policy *pol;
2143                 struct hlist_node *entry;
2144                 struct hlist_head *table;
2145                 int i;
2146
2147                 hlist_for_each_entry(pol, entry,
2148                                      &init_net.xfrm.policy_inexact[dir], bydst)
2149                         prune_one_bundle(pol, func, &gc_list);
2150
2151                 table = init_net.xfrm.policy_bydst[dir].table;
2152                 for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
2153                         hlist_for_each_entry(pol, entry, table + i, bydst)
2154                                 prune_one_bundle(pol, func, &gc_list);
2155                 }
2156         }
2157         read_unlock_bh(&xfrm_policy_lock);
2158
2159         while (gc_list) {
2160                 struct dst_entry *dst = gc_list;
2161                 gc_list = dst->next;
2162                 dst_free(dst);
2163         }
2164 }
2165
2166 static int unused_bundle(struct dst_entry *dst)
2167 {
2168         return !atomic_read(&dst->__refcnt);
2169 }
2170
2171 static void __xfrm_garbage_collect(void)
2172 {
2173         xfrm_prune_bundles(unused_bundle);
2174 }
2175
2176 static int xfrm_flush_bundles(void)
2177 {
2178         xfrm_prune_bundles(stale_bundle);
2179         return 0;
2180 }
2181
2182 static void xfrm_init_pmtu(struct dst_entry *dst)
2183 {
2184         do {
2185                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2186                 u32 pmtu, route_mtu_cached;
2187
2188                 pmtu = dst_mtu(dst->child);
2189                 xdst->child_mtu_cached = pmtu;
2190
2191                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2192
2193                 route_mtu_cached = dst_mtu(xdst->route);
2194                 xdst->route_mtu_cached = route_mtu_cached;
2195
2196                 if (pmtu > route_mtu_cached)
2197                         pmtu = route_mtu_cached;
2198
2199                 dst->metrics[RTAX_MTU-1] = pmtu;
2200         } while ((dst = dst->next));
2201 }
2202
2203 /* Check that the bundle accepts the flow and its components are
2204  * still valid.
2205  */
2206
2207 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2208                 struct flowi *fl, int family, int strict)
2209 {
2210         struct dst_entry *dst = &first->u.dst;
2211         struct xfrm_dst *last;
2212         u32 mtu;
2213
2214         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2215             (dst->dev && !netif_running(dst->dev)))
2216                 return 0;
2217 #ifdef CONFIG_XFRM_SUB_POLICY
2218         if (fl) {
2219                 if (first->origin && !flow_cache_uli_match(first->origin, fl))
2220                         return 0;
2221                 if (first->partner &&
2222                     !xfrm_selector_match(first->partner, fl, family))
2223                         return 0;
2224         }
2225 #endif
2226
2227         last = NULL;
2228
2229         do {
2230                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2231
2232                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2233                         return 0;
2234                 if (fl && pol &&
2235                     !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2236                         return 0;
2237                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2238                         return 0;
2239                 if (xdst->genid != dst->xfrm->genid)
2240                         return 0;
2241
2242                 if (strict && fl &&
2243                     !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2244                     !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2245                         return 0;
2246
2247                 mtu = dst_mtu(dst->child);
2248                 if (xdst->child_mtu_cached != mtu) {
2249                         last = xdst;
2250                         xdst->child_mtu_cached = mtu;
2251                 }
2252
2253                 if (!dst_check(xdst->route, xdst->route_cookie))
2254                         return 0;
2255                 mtu = dst_mtu(xdst->route);
2256                 if (xdst->route_mtu_cached != mtu) {
2257                         last = xdst;
2258                         xdst->route_mtu_cached = mtu;
2259                 }
2260
2261                 dst = dst->child;
2262         } while (dst->xfrm);
2263
2264         if (likely(!last))
2265                 return 1;
2266
2267         mtu = last->child_mtu_cached;
2268         for (;;) {
2269                 dst = &last->u.dst;
2270
2271                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2272                 if (mtu > last->route_mtu_cached)
2273                         mtu = last->route_mtu_cached;
2274                 dst->metrics[RTAX_MTU-1] = mtu;
2275
2276                 if (last == first)
2277                         break;
2278
2279                 last = (struct xfrm_dst *)last->u.dst.next;
2280                 last->child_mtu_cached = mtu;
2281         }
2282
2283         return 1;
2284 }
2285
2286 EXPORT_SYMBOL(xfrm_bundle_ok);
2287
2288 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2289 {
2290         int err = 0;
2291         if (unlikely(afinfo == NULL))
2292                 return -EINVAL;
2293         if (unlikely(afinfo->family >= NPROTO))
2294                 return -EAFNOSUPPORT;
2295         write_lock_bh(&xfrm_policy_afinfo_lock);
2296         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2297                 err = -ENOBUFS;
2298         else {
2299                 struct dst_ops *dst_ops = afinfo->dst_ops;
2300                 if (likely(dst_ops->kmem_cachep == NULL))
2301                         dst_ops->kmem_cachep = xfrm_dst_cache;
2302                 if (likely(dst_ops->check == NULL))
2303                         dst_ops->check = xfrm_dst_check;
2304                 if (likely(dst_ops->negative_advice == NULL))
2305                         dst_ops->negative_advice = xfrm_negative_advice;
2306                 if (likely(dst_ops->link_failure == NULL))
2307                         dst_ops->link_failure = xfrm_link_failure;
2308                 if (likely(afinfo->garbage_collect == NULL))
2309                         afinfo->garbage_collect = __xfrm_garbage_collect;
2310                 xfrm_policy_afinfo[afinfo->family] = afinfo;
2311         }
2312         write_unlock_bh(&xfrm_policy_afinfo_lock);
2313         return err;
2314 }
2315 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2316
2317 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2318 {
2319         int err = 0;
2320         if (unlikely(afinfo == NULL))
2321                 return -EINVAL;
2322         if (unlikely(afinfo->family >= NPROTO))
2323                 return -EAFNOSUPPORT;
2324         write_lock_bh(&xfrm_policy_afinfo_lock);
2325         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2326                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2327                         err = -EINVAL;
2328                 else {
2329                         struct dst_ops *dst_ops = afinfo->dst_ops;
2330                         xfrm_policy_afinfo[afinfo->family] = NULL;
2331                         dst_ops->kmem_cachep = NULL;
2332                         dst_ops->check = NULL;
2333                         dst_ops->negative_advice = NULL;
2334                         dst_ops->link_failure = NULL;
2335                         afinfo->garbage_collect = NULL;
2336                 }
2337         }
2338         write_unlock_bh(&xfrm_policy_afinfo_lock);
2339         return err;
2340 }
2341 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2342
2343 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2344 {
2345         struct xfrm_policy_afinfo *afinfo;
2346         if (unlikely(family >= NPROTO))
2347                 return NULL;
2348         read_lock(&xfrm_policy_afinfo_lock);
2349         afinfo = xfrm_policy_afinfo[family];
2350         if (unlikely(!afinfo))
2351                 read_unlock(&xfrm_policy_afinfo_lock);
2352         return afinfo;
2353 }
2354
2355 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2356 {
2357         read_unlock(&xfrm_policy_afinfo_lock);
2358 }
2359
2360 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2361 {
2362         struct net_device *dev = ptr;
2363
2364         if (!net_eq(dev_net(dev), &init_net))
2365                 return NOTIFY_DONE;
2366
2367         switch (event) {
2368         case NETDEV_DOWN:
2369                 xfrm_flush_bundles();
2370         }
2371         return NOTIFY_DONE;
2372 }
2373
2374 static struct notifier_block xfrm_dev_notifier = {
2375         .notifier_call  = xfrm_dev_event,
2376 };
2377
2378 #ifdef CONFIG_XFRM_STATISTICS
2379 static int __init xfrm_statistics_init(void)
2380 {
2381         if (snmp_mib_init((void **)xfrm_statistics,
2382                           sizeof(struct linux_xfrm_mib)) < 0)
2383                 return -ENOMEM;
2384         return 0;
2385 }
2386 #endif
2387
2388 static int __net_init xfrm_policy_init(struct net *net)
2389 {
2390         unsigned int hmask, sz;
2391         int dir;
2392
2393         if (net_eq(net, &init_net))
2394                 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2395                                            sizeof(struct xfrm_dst),
2396                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2397                                            NULL);
2398
2399         hmask = 8 - 1;
2400         sz = (hmask+1) * sizeof(struct hlist_head);
2401
2402         net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2403         if (!net->xfrm.policy_byidx)
2404                 goto out_byidx;
2405         net->xfrm.policy_idx_hmask = hmask;
2406
2407         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2408                 struct xfrm_policy_hash *htab;
2409
2410                 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2411
2412                 htab = &net->xfrm.policy_bydst[dir];
2413                 htab->table = xfrm_hash_alloc(sz);
2414                 if (!htab->table)
2415                         goto out_bydst;
2416                 htab->hmask = hmask;
2417         }
2418
2419         INIT_LIST_HEAD(&net->xfrm.policy_all);
2420         if (net_eq(net, &init_net))
2421                 register_netdevice_notifier(&xfrm_dev_notifier);
2422         return 0;
2423
2424 out_bydst:
2425         for (dir--; dir >= 0; dir--) {
2426                 struct xfrm_policy_hash *htab;
2427
2428                 htab = &net->xfrm.policy_bydst[dir];
2429                 xfrm_hash_free(htab->table, sz);
2430         }
2431         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2432 out_byidx:
2433         return -ENOMEM;
2434 }
2435
2436 static void xfrm_policy_fini(struct net *net)
2437 {
2438         unsigned int sz;
2439         int dir;
2440
2441         WARN_ON(!list_empty(&net->xfrm.policy_all));
2442
2443         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2444                 struct xfrm_policy_hash *htab;
2445
2446                 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2447
2448                 htab = &net->xfrm.policy_bydst[dir];
2449                 sz = (htab->hmask + 1);
2450                 WARN_ON(!hlist_empty(htab->table));
2451                 xfrm_hash_free(htab->table, sz);
2452         }
2453
2454         sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2455         WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
2456         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2457 }
2458
2459 static int __net_init xfrm_net_init(struct net *net)
2460 {
2461         int rv;
2462
2463         rv = xfrm_state_init(net);
2464         if (rv < 0)
2465                 goto out_state;
2466         rv = xfrm_policy_init(net);
2467         if (rv < 0)
2468                 goto out_policy;
2469         return 0;
2470
2471 out_policy:
2472         xfrm_state_fini(net);
2473 out_state:
2474         return rv;
2475 }
2476
2477 static void __net_exit xfrm_net_exit(struct net *net)
2478 {
2479         xfrm_policy_fini(net);
2480         xfrm_state_fini(net);
2481 }
2482
2483 static struct pernet_operations __net_initdata xfrm_net_ops = {
2484         .init = xfrm_net_init,
2485         .exit = xfrm_net_exit,
2486 };
2487
2488 void __init xfrm_init(void)
2489 {
2490         register_pernet_subsys(&xfrm_net_ops);
2491 #ifdef CONFIG_XFRM_STATISTICS
2492         xfrm_statistics_init();
2493 #endif
2494         xfrm_input_init();
2495 #ifdef CONFIG_XFRM_STATISTICS
2496         xfrm_proc_init();
2497 #endif
2498 }
2499
2500 #ifdef CONFIG_AUDITSYSCALL
2501 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2502                                          struct audit_buffer *audit_buf)
2503 {
2504         struct xfrm_sec_ctx *ctx = xp->security;
2505         struct xfrm_selector *sel = &xp->selector;
2506
2507         if (ctx)
2508                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2509                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2510
2511         switch(sel->family) {
2512         case AF_INET:
2513                 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
2514                 if (sel->prefixlen_s != 32)
2515                         audit_log_format(audit_buf, " src_prefixlen=%d",
2516                                          sel->prefixlen_s);
2517                 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
2518                 if (sel->prefixlen_d != 32)
2519                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2520                                          sel->prefixlen_d);
2521                 break;
2522         case AF_INET6:
2523                 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
2524                 if (sel->prefixlen_s != 128)
2525                         audit_log_format(audit_buf, " src_prefixlen=%d",
2526                                          sel->prefixlen_s);
2527                 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
2528                 if (sel->prefixlen_d != 128)
2529                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2530                                          sel->prefixlen_d);
2531                 break;
2532         }
2533 }
2534
2535 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2536                            uid_t auid, u32 sessionid, u32 secid)
2537 {
2538         struct audit_buffer *audit_buf;
2539
2540         audit_buf = xfrm_audit_start("SPD-add");
2541         if (audit_buf == NULL)
2542                 return;
2543         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2544         audit_log_format(audit_buf, " res=%u", result);
2545         xfrm_audit_common_policyinfo(xp, audit_buf);
2546         audit_log_end(audit_buf);
2547 }
2548 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2549
2550 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2551                               uid_t auid, u32 sessionid, u32 secid)
2552 {
2553         struct audit_buffer *audit_buf;
2554
2555         audit_buf = xfrm_audit_start("SPD-delete");
2556         if (audit_buf == NULL)
2557                 return;
2558         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2559         audit_log_format(audit_buf, " res=%u", result);
2560         xfrm_audit_common_policyinfo(xp, audit_buf);
2561         audit_log_end(audit_buf);
2562 }
2563 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2564 #endif
2565
2566 #ifdef CONFIG_XFRM_MIGRATE
2567 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2568                                        struct xfrm_selector *sel_tgt)
2569 {
2570         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2571                 if (sel_tgt->family == sel_cmp->family &&
2572                     xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2573                                   sel_cmp->family) == 0 &&
2574                     xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2575                                   sel_cmp->family) == 0 &&
2576                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2577                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2578                         return 1;
2579                 }
2580         } else {
2581                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2582                         return 1;
2583                 }
2584         }
2585         return 0;
2586 }
2587
2588 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2589                                                      u8 dir, u8 type)
2590 {
2591         struct xfrm_policy *pol, *ret = NULL;
2592         struct hlist_node *entry;
2593         struct hlist_head *chain;
2594         u32 priority = ~0U;
2595
2596         read_lock_bh(&xfrm_policy_lock);
2597         chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2598         hlist_for_each_entry(pol, entry, chain, bydst) {
2599                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2600                     pol->type == type) {
2601                         ret = pol;
2602                         priority = ret->priority;
2603                         break;
2604                 }
2605         }
2606         chain = &init_net.xfrm.policy_inexact[dir];
2607         hlist_for_each_entry(pol, entry, chain, bydst) {
2608                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2609                     pol->type == type &&
2610                     pol->priority < priority) {
2611                         ret = pol;
2612                         break;
2613                 }
2614         }
2615
2616         if (ret)
2617                 xfrm_pol_hold(ret);
2618
2619         read_unlock_bh(&xfrm_policy_lock);
2620
2621         return ret;
2622 }
2623
2624 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2625 {
2626         int match = 0;
2627
2628         if (t->mode == m->mode && t->id.proto == m->proto &&
2629             (m->reqid == 0 || t->reqid == m->reqid)) {
2630                 switch (t->mode) {
2631                 case XFRM_MODE_TUNNEL:
2632                 case XFRM_MODE_BEET:
2633                         if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2634                                           m->old_family) == 0 &&
2635                             xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2636                                           m->old_family) == 0) {
2637                                 match = 1;
2638                         }
2639                         break;
2640                 case XFRM_MODE_TRANSPORT:
2641                         /* in case of transport mode, template does not store
2642                            any IP addresses, hence we just compare mode and
2643                            protocol */
2644                         match = 1;
2645                         break;
2646                 default:
2647                         break;
2648                 }
2649         }
2650         return match;
2651 }
2652
2653 /* update endpoint address(es) of template(s) */
2654 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2655                                struct xfrm_migrate *m, int num_migrate)
2656 {
2657         struct xfrm_migrate *mp;
2658         struct dst_entry *dst;
2659         int i, j, n = 0;
2660
2661         write_lock_bh(&pol->lock);
2662         if (unlikely(pol->walk.dead)) {
2663                 /* target policy has been deleted */
2664                 write_unlock_bh(&pol->lock);
2665                 return -ENOENT;
2666         }
2667
2668         for (i = 0; i < pol->xfrm_nr; i++) {
2669                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2670                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2671                                 continue;
2672                         n++;
2673                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2674                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2675                                 continue;
2676                         /* update endpoints */
2677                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2678                                sizeof(pol->xfrm_vec[i].id.daddr));
2679                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2680                                sizeof(pol->xfrm_vec[i].saddr));
2681                         pol->xfrm_vec[i].encap_family = mp->new_family;
2682                         /* flush bundles */
2683                         while ((dst = pol->bundles) != NULL) {
2684                                 pol->bundles = dst->next;
2685                                 dst_free(dst);
2686                         }
2687                 }
2688         }
2689
2690         write_unlock_bh(&pol->lock);
2691
2692         if (!n)
2693                 return -ENODATA;
2694
2695         return 0;
2696 }
2697
2698 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2699 {
2700         int i, j;
2701
2702         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2703                 return -EINVAL;
2704
2705         for (i = 0; i < num_migrate; i++) {
2706                 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2707                                    m[i].old_family) == 0) &&
2708                     (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2709                                    m[i].old_family) == 0))
2710                         return -EINVAL;
2711                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2712                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2713                         return -EINVAL;
2714
2715                 /* check if there is any duplicated entry */
2716                 for (j = i + 1; j < num_migrate; j++) {
2717                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2718                                     sizeof(m[i].old_daddr)) &&
2719                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2720                                     sizeof(m[i].old_saddr)) &&
2721                             m[i].proto == m[j].proto &&
2722                             m[i].mode == m[j].mode &&
2723                             m[i].reqid == m[j].reqid &&
2724                             m[i].old_family == m[j].old_family)
2725                                 return -EINVAL;
2726                 }
2727         }
2728
2729         return 0;
2730 }
2731
2732 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2733                  struct xfrm_migrate *m, int num_migrate,
2734                  struct xfrm_kmaddress *k)
2735 {
2736         int i, err, nx_cur = 0, nx_new = 0;
2737         struct xfrm_policy *pol = NULL;
2738         struct xfrm_state *x, *xc;
2739         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2740         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2741         struct xfrm_migrate *mp;
2742
2743         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2744                 goto out;
2745
2746         /* Stage 1 - find policy */
2747         if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2748                 err = -ENOENT;
2749                 goto out;
2750         }
2751
2752         /* Stage 2 - find and update state(s) */
2753         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2754                 if ((x = xfrm_migrate_state_find(mp))) {
2755                         x_cur[nx_cur] = x;
2756                         nx_cur++;
2757                         if ((xc = xfrm_state_migrate(x, mp))) {
2758                                 x_new[nx_new] = xc;
2759                                 nx_new++;
2760                         } else {
2761                                 err = -ENODATA;
2762                                 goto restore_state;
2763                         }
2764                 }
2765         }
2766
2767         /* Stage 3 - update policy */
2768         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2769                 goto restore_state;
2770
2771         /* Stage 4 - delete old state(s) */
2772         if (nx_cur) {
2773                 xfrm_states_put(x_cur, nx_cur);
2774                 xfrm_states_delete(x_cur, nx_cur);
2775         }
2776
2777         /* Stage 5 - announce */
2778         km_migrate(sel, dir, type, m, num_migrate, k);
2779
2780         xfrm_pol_put(pol);
2781
2782         return 0;
2783 out:
2784         return err;
2785
2786 restore_state:
2787         if (pol)
2788                 xfrm_pol_put(pol);
2789         if (nx_cur)
2790                 xfrm_states_put(x_cur, nx_cur);
2791         if (nx_new)
2792                 xfrm_states_delete(x_new, nx_new);
2793
2794         return err;
2795 }
2796 EXPORT_SYMBOL(xfrm_migrate);
2797 #endif