netns xfrm: fixup xfrm_alloc_spi()
[safe/jmp/linux-2.6] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <linux/audit.h>
23 #include <asm/uaccess.h>
24
25 #include "xfrm_hash.h"
26
27 struct sock *xfrm_nl;
28 EXPORT_SYMBOL(xfrm_nl);
29
30 u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
31 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32
33 u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
34 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35
36 u32 sysctl_xfrm_acq_expires __read_mostly = 30;
37
38 /* Each xfrm_state may be linked to two tables:
39
40    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
41    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
42       destination/tunnel endpoint. (output)
43  */
44
45 static DEFINE_SPINLOCK(xfrm_state_lock);
46
47 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
48 static unsigned int xfrm_state_genid;
49
50 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
51 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
52
53 #ifdef CONFIG_AUDITSYSCALL
54 static void xfrm_audit_state_replay(struct xfrm_state *x,
55                                     struct sk_buff *skb, __be32 net_seq);
56 #else
57 #define xfrm_audit_state_replay(x, s, sq)       do { ; } while (0)
58 #endif /* CONFIG_AUDITSYSCALL */
59
60 static inline unsigned int xfrm_dst_hash(struct net *net,
61                                          xfrm_address_t *daddr,
62                                          xfrm_address_t *saddr,
63                                          u32 reqid,
64                                          unsigned short family)
65 {
66         return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
67 }
68
69 static inline unsigned int xfrm_src_hash(struct net *net,
70                                          xfrm_address_t *daddr,
71                                          xfrm_address_t *saddr,
72                                          unsigned short family)
73 {
74         return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
75 }
76
77 static inline unsigned int
78 xfrm_spi_hash(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
79 {
80         return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
81 }
82
83 static void xfrm_hash_transfer(struct hlist_head *list,
84                                struct hlist_head *ndsttable,
85                                struct hlist_head *nsrctable,
86                                struct hlist_head *nspitable,
87                                unsigned int nhashmask)
88 {
89         struct hlist_node *entry, *tmp;
90         struct xfrm_state *x;
91
92         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
93                 unsigned int h;
94
95                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
96                                     x->props.reqid, x->props.family,
97                                     nhashmask);
98                 hlist_add_head(&x->bydst, ndsttable+h);
99
100                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
101                                     x->props.family,
102                                     nhashmask);
103                 hlist_add_head(&x->bysrc, nsrctable+h);
104
105                 if (x->id.spi) {
106                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
107                                             x->id.proto, x->props.family,
108                                             nhashmask);
109                         hlist_add_head(&x->byspi, nspitable+h);
110                 }
111         }
112 }
113
114 static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
115 {
116         return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
117 }
118
119 static DEFINE_MUTEX(hash_resize_mutex);
120
121 static void xfrm_hash_resize(struct work_struct *work)
122 {
123         struct net *net = container_of(work, struct net, xfrm.state_hash_work);
124         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
125         unsigned long nsize, osize;
126         unsigned int nhashmask, ohashmask;
127         int i;
128
129         mutex_lock(&hash_resize_mutex);
130
131         nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
132         ndst = xfrm_hash_alloc(nsize);
133         if (!ndst)
134                 goto out_unlock;
135         nsrc = xfrm_hash_alloc(nsize);
136         if (!nsrc) {
137                 xfrm_hash_free(ndst, nsize);
138                 goto out_unlock;
139         }
140         nspi = xfrm_hash_alloc(nsize);
141         if (!nspi) {
142                 xfrm_hash_free(ndst, nsize);
143                 xfrm_hash_free(nsrc, nsize);
144                 goto out_unlock;
145         }
146
147         spin_lock_bh(&xfrm_state_lock);
148
149         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
150         for (i = net->xfrm.state_hmask; i >= 0; i--)
151                 xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
152                                    nhashmask);
153
154         odst = net->xfrm.state_bydst;
155         osrc = net->xfrm.state_bysrc;
156         ospi = net->xfrm.state_byspi;
157         ohashmask = net->xfrm.state_hmask;
158
159         net->xfrm.state_bydst = ndst;
160         net->xfrm.state_bysrc = nsrc;
161         net->xfrm.state_byspi = nspi;
162         net->xfrm.state_hmask = nhashmask;
163
164         spin_unlock_bh(&xfrm_state_lock);
165
166         osize = (ohashmask + 1) * sizeof(struct hlist_head);
167         xfrm_hash_free(odst, osize);
168         xfrm_hash_free(osrc, osize);
169         xfrm_hash_free(ospi, osize);
170
171 out_unlock:
172         mutex_unlock(&hash_resize_mutex);
173 }
174
175 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
176 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
177
178 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
179
180 int __xfrm_state_delete(struct xfrm_state *x);
181
182 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
183 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
184
185 static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
186 {
187         struct xfrm_state_afinfo *afinfo;
188         if (unlikely(family >= NPROTO))
189                 return NULL;
190         write_lock_bh(&xfrm_state_afinfo_lock);
191         afinfo = xfrm_state_afinfo[family];
192         if (unlikely(!afinfo))
193                 write_unlock_bh(&xfrm_state_afinfo_lock);
194         return afinfo;
195 }
196
197 static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo)
198         __releases(xfrm_state_afinfo_lock)
199 {
200         write_unlock_bh(&xfrm_state_afinfo_lock);
201 }
202
203 int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
204 {
205         struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
206         const struct xfrm_type **typemap;
207         int err = 0;
208
209         if (unlikely(afinfo == NULL))
210                 return -EAFNOSUPPORT;
211         typemap = afinfo->type_map;
212
213         if (likely(typemap[type->proto] == NULL))
214                 typemap[type->proto] = type;
215         else
216                 err = -EEXIST;
217         xfrm_state_unlock_afinfo(afinfo);
218         return err;
219 }
220 EXPORT_SYMBOL(xfrm_register_type);
221
222 int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
223 {
224         struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family);
225         const struct xfrm_type **typemap;
226         int err = 0;
227
228         if (unlikely(afinfo == NULL))
229                 return -EAFNOSUPPORT;
230         typemap = afinfo->type_map;
231
232         if (unlikely(typemap[type->proto] != type))
233                 err = -ENOENT;
234         else
235                 typemap[type->proto] = NULL;
236         xfrm_state_unlock_afinfo(afinfo);
237         return err;
238 }
239 EXPORT_SYMBOL(xfrm_unregister_type);
240
241 static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
242 {
243         struct xfrm_state_afinfo *afinfo;
244         const struct xfrm_type **typemap;
245         const struct xfrm_type *type;
246         int modload_attempted = 0;
247
248 retry:
249         afinfo = xfrm_state_get_afinfo(family);
250         if (unlikely(afinfo == NULL))
251                 return NULL;
252         typemap = afinfo->type_map;
253
254         type = typemap[proto];
255         if (unlikely(type && !try_module_get(type->owner)))
256                 type = NULL;
257         if (!type && !modload_attempted) {
258                 xfrm_state_put_afinfo(afinfo);
259                 request_module("xfrm-type-%d-%d", family, proto);
260                 modload_attempted = 1;
261                 goto retry;
262         }
263
264         xfrm_state_put_afinfo(afinfo);
265         return type;
266 }
267
268 static void xfrm_put_type(const struct xfrm_type *type)
269 {
270         module_put(type->owner);
271 }
272
273 int xfrm_register_mode(struct xfrm_mode *mode, int family)
274 {
275         struct xfrm_state_afinfo *afinfo;
276         struct xfrm_mode **modemap;
277         int err;
278
279         if (unlikely(mode->encap >= XFRM_MODE_MAX))
280                 return -EINVAL;
281
282         afinfo = xfrm_state_lock_afinfo(family);
283         if (unlikely(afinfo == NULL))
284                 return -EAFNOSUPPORT;
285
286         err = -EEXIST;
287         modemap = afinfo->mode_map;
288         if (modemap[mode->encap])
289                 goto out;
290
291         err = -ENOENT;
292         if (!try_module_get(afinfo->owner))
293                 goto out;
294
295         mode->afinfo = afinfo;
296         modemap[mode->encap] = mode;
297         err = 0;
298
299 out:
300         xfrm_state_unlock_afinfo(afinfo);
301         return err;
302 }
303 EXPORT_SYMBOL(xfrm_register_mode);
304
305 int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
306 {
307         struct xfrm_state_afinfo *afinfo;
308         struct xfrm_mode **modemap;
309         int err;
310
311         if (unlikely(mode->encap >= XFRM_MODE_MAX))
312                 return -EINVAL;
313
314         afinfo = xfrm_state_lock_afinfo(family);
315         if (unlikely(afinfo == NULL))
316                 return -EAFNOSUPPORT;
317
318         err = -ENOENT;
319         modemap = afinfo->mode_map;
320         if (likely(modemap[mode->encap] == mode)) {
321                 modemap[mode->encap] = NULL;
322                 module_put(mode->afinfo->owner);
323                 err = 0;
324         }
325
326         xfrm_state_unlock_afinfo(afinfo);
327         return err;
328 }
329 EXPORT_SYMBOL(xfrm_unregister_mode);
330
331 static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
332 {
333         struct xfrm_state_afinfo *afinfo;
334         struct xfrm_mode *mode;
335         int modload_attempted = 0;
336
337         if (unlikely(encap >= XFRM_MODE_MAX))
338                 return NULL;
339
340 retry:
341         afinfo = xfrm_state_get_afinfo(family);
342         if (unlikely(afinfo == NULL))
343                 return NULL;
344
345         mode = afinfo->mode_map[encap];
346         if (unlikely(mode && !try_module_get(mode->owner)))
347                 mode = NULL;
348         if (!mode && !modload_attempted) {
349                 xfrm_state_put_afinfo(afinfo);
350                 request_module("xfrm-mode-%d-%d", family, encap);
351                 modload_attempted = 1;
352                 goto retry;
353         }
354
355         xfrm_state_put_afinfo(afinfo);
356         return mode;
357 }
358
359 static void xfrm_put_mode(struct xfrm_mode *mode)
360 {
361         module_put(mode->owner);
362 }
363
364 static void xfrm_state_gc_destroy(struct xfrm_state *x)
365 {
366         del_timer_sync(&x->timer);
367         del_timer_sync(&x->rtimer);
368         kfree(x->aalg);
369         kfree(x->ealg);
370         kfree(x->calg);
371         kfree(x->encap);
372         kfree(x->coaddr);
373         if (x->inner_mode)
374                 xfrm_put_mode(x->inner_mode);
375         if (x->inner_mode_iaf)
376                 xfrm_put_mode(x->inner_mode_iaf);
377         if (x->outer_mode)
378                 xfrm_put_mode(x->outer_mode);
379         if (x->type) {
380                 x->type->destructor(x);
381                 xfrm_put_type(x->type);
382         }
383         security_xfrm_state_free(x);
384         kfree(x);
385 }
386
387 static void xfrm_state_gc_task(struct work_struct *work)
388 {
389         struct net *net = container_of(work, struct net, xfrm.state_gc_work);
390         struct xfrm_state *x;
391         struct hlist_node *entry, *tmp;
392         struct hlist_head gc_list;
393
394         spin_lock_bh(&xfrm_state_gc_lock);
395         hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
396         spin_unlock_bh(&xfrm_state_gc_lock);
397
398         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
399                 xfrm_state_gc_destroy(x);
400
401         wake_up(&net->xfrm.km_waitq);
402 }
403
404 static inline unsigned long make_jiffies(long secs)
405 {
406         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
407                 return MAX_SCHEDULE_TIMEOUT-1;
408         else
409                 return secs*HZ;
410 }
411
412 static void xfrm_timer_handler(unsigned long data)
413 {
414         struct xfrm_state *x = (struct xfrm_state*)data;
415         struct net *net = xs_net(x);
416         unsigned long now = get_seconds();
417         long next = LONG_MAX;
418         int warn = 0;
419         int err = 0;
420
421         spin_lock(&x->lock);
422         if (x->km.state == XFRM_STATE_DEAD)
423                 goto out;
424         if (x->km.state == XFRM_STATE_EXPIRED)
425                 goto expired;
426         if (x->lft.hard_add_expires_seconds) {
427                 long tmo = x->lft.hard_add_expires_seconds +
428                         x->curlft.add_time - now;
429                 if (tmo <= 0)
430                         goto expired;
431                 if (tmo < next)
432                         next = tmo;
433         }
434         if (x->lft.hard_use_expires_seconds) {
435                 long tmo = x->lft.hard_use_expires_seconds +
436                         (x->curlft.use_time ? : now) - now;
437                 if (tmo <= 0)
438                         goto expired;
439                 if (tmo < next)
440                         next = tmo;
441         }
442         if (x->km.dying)
443                 goto resched;
444         if (x->lft.soft_add_expires_seconds) {
445                 long tmo = x->lft.soft_add_expires_seconds +
446                         x->curlft.add_time - now;
447                 if (tmo <= 0)
448                         warn = 1;
449                 else if (tmo < next)
450                         next = tmo;
451         }
452         if (x->lft.soft_use_expires_seconds) {
453                 long tmo = x->lft.soft_use_expires_seconds +
454                         (x->curlft.use_time ? : now) - now;
455                 if (tmo <= 0)
456                         warn = 1;
457                 else if (tmo < next)
458                         next = tmo;
459         }
460
461         x->km.dying = warn;
462         if (warn)
463                 km_state_expired(x, 0, 0);
464 resched:
465         if (next != LONG_MAX)
466                 mod_timer(&x->timer, jiffies + make_jiffies(next));
467
468         goto out;
469
470 expired:
471         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
472                 x->km.state = XFRM_STATE_EXPIRED;
473                 wake_up(&net->xfrm.km_waitq);
474                 next = 2;
475                 goto resched;
476         }
477
478         err = __xfrm_state_delete(x);
479         if (!err && x->id.spi)
480                 km_state_expired(x, 1, 0);
481
482         xfrm_audit_state_delete(x, err ? 0 : 1,
483                                 audit_get_loginuid(current),
484                                 audit_get_sessionid(current), 0);
485
486 out:
487         spin_unlock(&x->lock);
488 }
489
490 static void xfrm_replay_timer_handler(unsigned long data);
491
492 struct xfrm_state *xfrm_state_alloc(struct net *net)
493 {
494         struct xfrm_state *x;
495
496         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
497
498         if (x) {
499                 write_pnet(&x->xs_net, net);
500                 atomic_set(&x->refcnt, 1);
501                 atomic_set(&x->tunnel_users, 0);
502                 INIT_LIST_HEAD(&x->km.all);
503                 INIT_HLIST_NODE(&x->bydst);
504                 INIT_HLIST_NODE(&x->bysrc);
505                 INIT_HLIST_NODE(&x->byspi);
506                 setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x);
507                 setup_timer(&x->rtimer, xfrm_replay_timer_handler,
508                                 (unsigned long)x);
509                 x->curlft.add_time = get_seconds();
510                 x->lft.soft_byte_limit = XFRM_INF;
511                 x->lft.soft_packet_limit = XFRM_INF;
512                 x->lft.hard_byte_limit = XFRM_INF;
513                 x->lft.hard_packet_limit = XFRM_INF;
514                 x->replay_maxage = 0;
515                 x->replay_maxdiff = 0;
516                 x->inner_mode = NULL;
517                 x->inner_mode_iaf = NULL;
518                 spin_lock_init(&x->lock);
519         }
520         return x;
521 }
522 EXPORT_SYMBOL(xfrm_state_alloc);
523
524 void __xfrm_state_destroy(struct xfrm_state *x)
525 {
526         struct net *net = xs_net(x);
527
528         WARN_ON(x->km.state != XFRM_STATE_DEAD);
529
530         spin_lock_bh(&xfrm_state_gc_lock);
531         hlist_add_head(&x->gclist, &net->xfrm.state_gc_list);
532         spin_unlock_bh(&xfrm_state_gc_lock);
533         schedule_work(&net->xfrm.state_gc_work);
534 }
535 EXPORT_SYMBOL(__xfrm_state_destroy);
536
537 int __xfrm_state_delete(struct xfrm_state *x)
538 {
539         struct net *net = xs_net(x);
540         int err = -ESRCH;
541
542         if (x->km.state != XFRM_STATE_DEAD) {
543                 x->km.state = XFRM_STATE_DEAD;
544                 spin_lock(&xfrm_state_lock);
545                 list_del(&x->km.all);
546                 hlist_del(&x->bydst);
547                 hlist_del(&x->bysrc);
548                 if (x->id.spi)
549                         hlist_del(&x->byspi);
550                 net->xfrm.state_num--;
551                 spin_unlock(&xfrm_state_lock);
552
553                 /* All xfrm_state objects are created by xfrm_state_alloc.
554                  * The xfrm_state_alloc call gives a reference, and that
555                  * is what we are dropping here.
556                  */
557                 xfrm_state_put(x);
558                 err = 0;
559         }
560
561         return err;
562 }
563 EXPORT_SYMBOL(__xfrm_state_delete);
564
565 int xfrm_state_delete(struct xfrm_state *x)
566 {
567         int err;
568
569         spin_lock_bh(&x->lock);
570         err = __xfrm_state_delete(x);
571         spin_unlock_bh(&x->lock);
572
573         return err;
574 }
575 EXPORT_SYMBOL(xfrm_state_delete);
576
577 #ifdef CONFIG_SECURITY_NETWORK_XFRM
578 static inline int
579 xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
580 {
581         int i, err = 0;
582
583         for (i = 0; i <= net->xfrm.state_hmask; i++) {
584                 struct hlist_node *entry;
585                 struct xfrm_state *x;
586
587                 hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
588                         if (xfrm_id_proto_match(x->id.proto, proto) &&
589                            (err = security_xfrm_state_delete(x)) != 0) {
590                                 xfrm_audit_state_delete(x, 0,
591                                                         audit_info->loginuid,
592                                                         audit_info->sessionid,
593                                                         audit_info->secid);
594                                 return err;
595                         }
596                 }
597         }
598
599         return err;
600 }
601 #else
602 static inline int
603 xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
604 {
605         return 0;
606 }
607 #endif
608
609 int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
610 {
611         int i, err = 0;
612
613         spin_lock_bh(&xfrm_state_lock);
614         err = xfrm_state_flush_secctx_check(net, proto, audit_info);
615         if (err)
616                 goto out;
617
618         for (i = 0; i <= net->xfrm.state_hmask; i++) {
619                 struct hlist_node *entry;
620                 struct xfrm_state *x;
621 restart:
622                 hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
623                         if (!xfrm_state_kern(x) &&
624                             xfrm_id_proto_match(x->id.proto, proto)) {
625                                 xfrm_state_hold(x);
626                                 spin_unlock_bh(&xfrm_state_lock);
627
628                                 err = xfrm_state_delete(x);
629                                 xfrm_audit_state_delete(x, err ? 0 : 1,
630                                                         audit_info->loginuid,
631                                                         audit_info->sessionid,
632                                                         audit_info->secid);
633                                 xfrm_state_put(x);
634
635                                 spin_lock_bh(&xfrm_state_lock);
636                                 goto restart;
637                         }
638                 }
639         }
640         err = 0;
641
642 out:
643         spin_unlock_bh(&xfrm_state_lock);
644         wake_up(&net->xfrm.km_waitq);
645         return err;
646 }
647 EXPORT_SYMBOL(xfrm_state_flush);
648
649 void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
650 {
651         spin_lock_bh(&xfrm_state_lock);
652         si->sadcnt = init_net.xfrm.state_num;
653         si->sadhcnt = init_net.xfrm.state_hmask;
654         si->sadhmcnt = xfrm_state_hashmax;
655         spin_unlock_bh(&xfrm_state_lock);
656 }
657 EXPORT_SYMBOL(xfrm_sad_getinfo);
658
659 static int
660 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
661                   struct xfrm_tmpl *tmpl,
662                   xfrm_address_t *daddr, xfrm_address_t *saddr,
663                   unsigned short family)
664 {
665         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
666         if (!afinfo)
667                 return -1;
668         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
669         xfrm_state_put_afinfo(afinfo);
670         return 0;
671 }
672
673 static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
674 {
675         unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
676         struct xfrm_state *x;
677         struct hlist_node *entry;
678
679         hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) {
680                 if (x->props.family != family ||
681                     x->id.spi       != spi ||
682                     x->id.proto     != proto)
683                         continue;
684
685                 switch (family) {
686                 case AF_INET:
687                         if (x->id.daddr.a4 != daddr->a4)
688                                 continue;
689                         break;
690                 case AF_INET6:
691                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
692                                              (struct in6_addr *)
693                                              x->id.daddr.a6))
694                                 continue;
695                         break;
696                 }
697
698                 xfrm_state_hold(x);
699                 return x;
700         }
701
702         return NULL;
703 }
704
705 static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
706 {
707         unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
708         struct xfrm_state *x;
709         struct hlist_node *entry;
710
711         hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) {
712                 if (x->props.family != family ||
713                     x->id.proto     != proto)
714                         continue;
715
716                 switch (family) {
717                 case AF_INET:
718                         if (x->id.daddr.a4 != daddr->a4 ||
719                             x->props.saddr.a4 != saddr->a4)
720                                 continue;
721                         break;
722                 case AF_INET6:
723                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
724                                              (struct in6_addr *)
725                                              x->id.daddr.a6) ||
726                             !ipv6_addr_equal((struct in6_addr *)saddr,
727                                              (struct in6_addr *)
728                                              x->props.saddr.a6))
729                                 continue;
730                         break;
731                 }
732
733                 xfrm_state_hold(x);
734                 return x;
735         }
736
737         return NULL;
738 }
739
740 static inline struct xfrm_state *
741 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
742 {
743         struct net *net = xs_net(x);
744
745         if (use_spi)
746                 return __xfrm_state_lookup(net, &x->id.daddr, x->id.spi,
747                                            x->id.proto, family);
748         else
749                 return __xfrm_state_lookup_byaddr(net, &x->id.daddr,
750                                                   &x->props.saddr,
751                                                   x->id.proto, family);
752 }
753
754 static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
755 {
756         if (have_hash_collision &&
757             (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
758             net->xfrm.state_num > net->xfrm.state_hmask)
759                 schedule_work(&net->xfrm.state_hash_work);
760 }
761
762 struct xfrm_state *
763 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
764                 struct flowi *fl, struct xfrm_tmpl *tmpl,
765                 struct xfrm_policy *pol, int *err,
766                 unsigned short family)
767 {
768         unsigned int h;
769         struct hlist_node *entry;
770         struct xfrm_state *x, *x0, *to_put;
771         int acquire_in_progress = 0;
772         int error = 0;
773         struct xfrm_state *best = NULL;
774
775         to_put = NULL;
776
777         spin_lock_bh(&xfrm_state_lock);
778         h = xfrm_dst_hash(&init_net, daddr, saddr, tmpl->reqid, family);
779         hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
780                 if (x->props.family == family &&
781                     x->props.reqid == tmpl->reqid &&
782                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
783                     xfrm_state_addr_check(x, daddr, saddr, family) &&
784                     tmpl->mode == x->props.mode &&
785                     tmpl->id.proto == x->id.proto &&
786                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
787                         /* Resolution logic:
788                            1. There is a valid state with matching selector.
789                               Done.
790                            2. Valid state with inappropriate selector. Skip.
791
792                            Entering area of "sysdeps".
793
794                            3. If state is not valid, selector is temporary,
795                               it selects only session which triggered
796                               previous resolution. Key manager will do
797                               something to install a state with proper
798                               selector.
799                          */
800                         if (x->km.state == XFRM_STATE_VALID) {
801                                 if ((x->sel.family && !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
802                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
803                                         continue;
804                                 if (!best ||
805                                     best->km.dying > x->km.dying ||
806                                     (best->km.dying == x->km.dying &&
807                                      best->curlft.add_time < x->curlft.add_time))
808                                         best = x;
809                         } else if (x->km.state == XFRM_STATE_ACQ) {
810                                 acquire_in_progress = 1;
811                         } else if (x->km.state == XFRM_STATE_ERROR ||
812                                    x->km.state == XFRM_STATE_EXPIRED) {
813                                 if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
814                                     security_xfrm_state_pol_flow_match(x, pol, fl))
815                                         error = -ESRCH;
816                         }
817                 }
818         }
819
820         x = best;
821         if (!x && !error && !acquire_in_progress) {
822                 if (tmpl->id.spi &&
823                     (x0 = __xfrm_state_lookup(&init_net, daddr, tmpl->id.spi,
824                                               tmpl->id.proto, family)) != NULL) {
825                         to_put = x0;
826                         error = -EEXIST;
827                         goto out;
828                 }
829                 x = xfrm_state_alloc(&init_net);
830                 if (x == NULL) {
831                         error = -ENOMEM;
832                         goto out;
833                 }
834                 /* Initialize temporary selector matching only
835                  * to current session. */
836                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
837
838                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
839                 if (error) {
840                         x->km.state = XFRM_STATE_DEAD;
841                         to_put = x;
842                         x = NULL;
843                         goto out;
844                 }
845
846                 if (km_query(x, tmpl, pol) == 0) {
847                         x->km.state = XFRM_STATE_ACQ;
848                         list_add(&x->km.all, &init_net.xfrm.state_all);
849                         hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
850                         h = xfrm_src_hash(&init_net, daddr, saddr, family);
851                         hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
852                         if (x->id.spi) {
853                                 h = xfrm_spi_hash(&init_net, &x->id.daddr, x->id.spi, x->id.proto, family);
854                                 hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h);
855                         }
856                         x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
857                         x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
858                         add_timer(&x->timer);
859                         init_net.xfrm.state_num++;
860                         xfrm_hash_grow_check(&init_net, x->bydst.next != NULL);
861                 } else {
862                         x->km.state = XFRM_STATE_DEAD;
863                         to_put = x;
864                         x = NULL;
865                         error = -ESRCH;
866                 }
867         }
868 out:
869         if (x)
870                 xfrm_state_hold(x);
871         else
872                 *err = acquire_in_progress ? -EAGAIN : error;
873         spin_unlock_bh(&xfrm_state_lock);
874         if (to_put)
875                 xfrm_state_put(to_put);
876         return x;
877 }
878
879 struct xfrm_state *
880 xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
881                     unsigned short family, u8 mode, u8 proto, u32 reqid)
882 {
883         unsigned int h;
884         struct xfrm_state *rx = NULL, *x = NULL;
885         struct hlist_node *entry;
886
887         spin_lock(&xfrm_state_lock);
888         h = xfrm_dst_hash(&init_net, daddr, saddr, reqid, family);
889         hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
890                 if (x->props.family == family &&
891                     x->props.reqid == reqid &&
892                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
893                     xfrm_state_addr_check(x, daddr, saddr, family) &&
894                     mode == x->props.mode &&
895                     proto == x->id.proto &&
896                     x->km.state == XFRM_STATE_VALID) {
897                         rx = x;
898                         break;
899                 }
900         }
901
902         if (rx)
903                 xfrm_state_hold(rx);
904         spin_unlock(&xfrm_state_lock);
905
906
907         return rx;
908 }
909 EXPORT_SYMBOL(xfrm_stateonly_find);
910
911 static void __xfrm_state_insert(struct xfrm_state *x)
912 {
913         struct net *net = xs_net(x);
914         unsigned int h;
915
916         x->genid = ++xfrm_state_genid;
917
918         list_add(&x->km.all, &net->xfrm.state_all);
919
920         h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
921                           x->props.reqid, x->props.family);
922         hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
923
924         h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
925         hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
926
927         if (x->id.spi) {
928                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
929                                   x->props.family);
930
931                 hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
932         }
933
934         mod_timer(&x->timer, jiffies + HZ);
935         if (x->replay_maxage)
936                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
937
938         wake_up(&net->xfrm.km_waitq);
939
940         net->xfrm.state_num++;
941
942         xfrm_hash_grow_check(net, x->bydst.next != NULL);
943 }
944
945 /* xfrm_state_lock is held */
946 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
947 {
948         struct net *net = xs_net(xnew);
949         unsigned short family = xnew->props.family;
950         u32 reqid = xnew->props.reqid;
951         struct xfrm_state *x;
952         struct hlist_node *entry;
953         unsigned int h;
954
955         h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
956         hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
957                 if (x->props.family     == family &&
958                     x->props.reqid      == reqid &&
959                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
960                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
961                         x->genid = xfrm_state_genid;
962         }
963 }
964
965 void xfrm_state_insert(struct xfrm_state *x)
966 {
967         spin_lock_bh(&xfrm_state_lock);
968         __xfrm_state_bump_genids(x);
969         __xfrm_state_insert(x);
970         spin_unlock_bh(&xfrm_state_lock);
971 }
972 EXPORT_SYMBOL(xfrm_state_insert);
973
974 /* xfrm_state_lock is held */
975 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
976 {
977         unsigned int h = xfrm_dst_hash(&init_net, daddr, saddr, reqid, family);
978         struct hlist_node *entry;
979         struct xfrm_state *x;
980
981         hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
982                 if (x->props.reqid  != reqid ||
983                     x->props.mode   != mode ||
984                     x->props.family != family ||
985                     x->km.state     != XFRM_STATE_ACQ ||
986                     x->id.spi       != 0 ||
987                     x->id.proto     != proto)
988                         continue;
989
990                 switch (family) {
991                 case AF_INET:
992                         if (x->id.daddr.a4    != daddr->a4 ||
993                             x->props.saddr.a4 != saddr->a4)
994                                 continue;
995                         break;
996                 case AF_INET6:
997                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
998                                              (struct in6_addr *)daddr) ||
999                             !ipv6_addr_equal((struct in6_addr *)
1000                                              x->props.saddr.a6,
1001                                              (struct in6_addr *)saddr))
1002                                 continue;
1003                         break;
1004                 }
1005
1006                 xfrm_state_hold(x);
1007                 return x;
1008         }
1009
1010         if (!create)
1011                 return NULL;
1012
1013         x = xfrm_state_alloc(&init_net);
1014         if (likely(x)) {
1015                 switch (family) {
1016                 case AF_INET:
1017                         x->sel.daddr.a4 = daddr->a4;
1018                         x->sel.saddr.a4 = saddr->a4;
1019                         x->sel.prefixlen_d = 32;
1020                         x->sel.prefixlen_s = 32;
1021                         x->props.saddr.a4 = saddr->a4;
1022                         x->id.daddr.a4 = daddr->a4;
1023                         break;
1024
1025                 case AF_INET6:
1026                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
1027                                        (struct in6_addr *)daddr);
1028                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
1029                                        (struct in6_addr *)saddr);
1030                         x->sel.prefixlen_d = 128;
1031                         x->sel.prefixlen_s = 128;
1032                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
1033                                        (struct in6_addr *)saddr);
1034                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
1035                                        (struct in6_addr *)daddr);
1036                         break;
1037                 }
1038
1039                 x->km.state = XFRM_STATE_ACQ;
1040                 x->id.proto = proto;
1041                 x->props.family = family;
1042                 x->props.mode = mode;
1043                 x->props.reqid = reqid;
1044                 x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
1045                 xfrm_state_hold(x);
1046                 x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
1047                 add_timer(&x->timer);
1048                 list_add(&x->km.all, &init_net.xfrm.state_all);
1049                 hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
1050                 h = xfrm_src_hash(&init_net, daddr, saddr, family);
1051                 hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
1052
1053                 init_net.xfrm.state_num++;
1054
1055                 xfrm_hash_grow_check(&init_net, x->bydst.next != NULL);
1056         }
1057
1058         return x;
1059 }
1060
1061 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
1062
1063 int xfrm_state_add(struct xfrm_state *x)
1064 {
1065         struct xfrm_state *x1, *to_put;
1066         int family;
1067         int err;
1068         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1069
1070         family = x->props.family;
1071
1072         to_put = NULL;
1073
1074         spin_lock_bh(&xfrm_state_lock);
1075
1076         x1 = __xfrm_state_locate(x, use_spi, family);
1077         if (x1) {
1078                 to_put = x1;
1079                 x1 = NULL;
1080                 err = -EEXIST;
1081                 goto out;
1082         }
1083
1084         if (use_spi && x->km.seq) {
1085                 x1 = __xfrm_find_acq_byseq(x->km.seq);
1086                 if (x1 && ((x1->id.proto != x->id.proto) ||
1087                     xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
1088                         to_put = x1;
1089                         x1 = NULL;
1090                 }
1091         }
1092
1093         if (use_spi && !x1)
1094                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
1095                                      x->id.proto,
1096                                      &x->id.daddr, &x->props.saddr, 0);
1097
1098         __xfrm_state_bump_genids(x);
1099         __xfrm_state_insert(x);
1100         err = 0;
1101
1102 out:
1103         spin_unlock_bh(&xfrm_state_lock);
1104
1105         if (x1) {
1106                 xfrm_state_delete(x1);
1107                 xfrm_state_put(x1);
1108         }
1109
1110         if (to_put)
1111                 xfrm_state_put(to_put);
1112
1113         return err;
1114 }
1115 EXPORT_SYMBOL(xfrm_state_add);
1116
1117 #ifdef CONFIG_XFRM_MIGRATE
1118 static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1119 {
1120         struct net *net = xs_net(orig);
1121         int err = -ENOMEM;
1122         struct xfrm_state *x = xfrm_state_alloc(net);
1123         if (!x)
1124                 goto error;
1125
1126         memcpy(&x->id, &orig->id, sizeof(x->id));
1127         memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1128         memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1129         x->props.mode = orig->props.mode;
1130         x->props.replay_window = orig->props.replay_window;
1131         x->props.reqid = orig->props.reqid;
1132         x->props.family = orig->props.family;
1133         x->props.saddr = orig->props.saddr;
1134
1135         if (orig->aalg) {
1136                 x->aalg = xfrm_algo_clone(orig->aalg);
1137                 if (!x->aalg)
1138                         goto error;
1139         }
1140         x->props.aalgo = orig->props.aalgo;
1141
1142         if (orig->ealg) {
1143                 x->ealg = xfrm_algo_clone(orig->ealg);
1144                 if (!x->ealg)
1145                         goto error;
1146         }
1147         x->props.ealgo = orig->props.ealgo;
1148
1149         if (orig->calg) {
1150                 x->calg = xfrm_algo_clone(orig->calg);
1151                 if (!x->calg)
1152                         goto error;
1153         }
1154         x->props.calgo = orig->props.calgo;
1155
1156         if (orig->encap) {
1157                 x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
1158                 if (!x->encap)
1159                         goto error;
1160         }
1161
1162         if (orig->coaddr) {
1163                 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1164                                     GFP_KERNEL);
1165                 if (!x->coaddr)
1166                         goto error;
1167         }
1168
1169         err = xfrm_init_state(x);
1170         if (err)
1171                 goto error;
1172
1173         x->props.flags = orig->props.flags;
1174
1175         x->curlft.add_time = orig->curlft.add_time;
1176         x->km.state = orig->km.state;
1177         x->km.seq = orig->km.seq;
1178
1179         return x;
1180
1181  error:
1182         if (errp)
1183                 *errp = err;
1184         if (x) {
1185                 kfree(x->aalg);
1186                 kfree(x->ealg);
1187                 kfree(x->calg);
1188                 kfree(x->encap);
1189                 kfree(x->coaddr);
1190         }
1191         kfree(x);
1192         return NULL;
1193 }
1194
1195 /* xfrm_state_lock is held */
1196 struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
1197 {
1198         unsigned int h;
1199         struct xfrm_state *x;
1200         struct hlist_node *entry;
1201
1202         if (m->reqid) {
1203                 h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr,
1204                                   m->reqid, m->old_family);
1205                 hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
1206                         if (x->props.mode != m->mode ||
1207                             x->id.proto != m->proto)
1208                                 continue;
1209                         if (m->reqid && x->props.reqid != m->reqid)
1210                                 continue;
1211                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1212                                           m->old_family) ||
1213                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1214                                           m->old_family))
1215                                 continue;
1216                         xfrm_state_hold(x);
1217                         return x;
1218                 }
1219         } else {
1220                 h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr,
1221                                   m->old_family);
1222                 hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) {
1223                         if (x->props.mode != m->mode ||
1224                             x->id.proto != m->proto)
1225                                 continue;
1226                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1227                                           m->old_family) ||
1228                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1229                                           m->old_family))
1230                                 continue;
1231                         xfrm_state_hold(x);
1232                         return x;
1233                 }
1234         }
1235
1236         return NULL;
1237 }
1238 EXPORT_SYMBOL(xfrm_migrate_state_find);
1239
1240 struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
1241                                        struct xfrm_migrate *m)
1242 {
1243         struct xfrm_state *xc;
1244         int err;
1245
1246         xc = xfrm_state_clone(x, &err);
1247         if (!xc)
1248                 return NULL;
1249
1250         memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1251         memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1252
1253         /* add state */
1254         if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
1255                 /* a care is needed when the destination address of the
1256                    state is to be updated as it is a part of triplet */
1257                 xfrm_state_insert(xc);
1258         } else {
1259                 if ((err = xfrm_state_add(xc)) < 0)
1260                         goto error;
1261         }
1262
1263         return xc;
1264 error:
1265         kfree(xc);
1266         return NULL;
1267 }
1268 EXPORT_SYMBOL(xfrm_state_migrate);
1269 #endif
1270
1271 int xfrm_state_update(struct xfrm_state *x)
1272 {
1273         struct xfrm_state *x1, *to_put;
1274         int err;
1275         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1276
1277         to_put = NULL;
1278
1279         spin_lock_bh(&xfrm_state_lock);
1280         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1281
1282         err = -ESRCH;
1283         if (!x1)
1284                 goto out;
1285
1286         if (xfrm_state_kern(x1)) {
1287                 to_put = x1;
1288                 err = -EEXIST;
1289                 goto out;
1290         }
1291
1292         if (x1->km.state == XFRM_STATE_ACQ) {
1293                 __xfrm_state_insert(x);
1294                 x = NULL;
1295         }
1296         err = 0;
1297
1298 out:
1299         spin_unlock_bh(&xfrm_state_lock);
1300
1301         if (to_put)
1302                 xfrm_state_put(to_put);
1303
1304         if (err)
1305                 return err;
1306
1307         if (!x) {
1308                 xfrm_state_delete(x1);
1309                 xfrm_state_put(x1);
1310                 return 0;
1311         }
1312
1313         err = -EINVAL;
1314         spin_lock_bh(&x1->lock);
1315         if (likely(x1->km.state == XFRM_STATE_VALID)) {
1316                 if (x->encap && x1->encap)
1317                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1318                 if (x->coaddr && x1->coaddr) {
1319                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1320                 }
1321                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1322                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1323                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1324                 x1->km.dying = 0;
1325
1326                 mod_timer(&x1->timer, jiffies + HZ);
1327                 if (x1->curlft.use_time)
1328                         xfrm_state_check_expire(x1);
1329
1330                 err = 0;
1331         }
1332         spin_unlock_bh(&x1->lock);
1333
1334         xfrm_state_put(x1);
1335
1336         return err;
1337 }
1338 EXPORT_SYMBOL(xfrm_state_update);
1339
1340 int xfrm_state_check_expire(struct xfrm_state *x)
1341 {
1342         if (!x->curlft.use_time)
1343                 x->curlft.use_time = get_seconds();
1344
1345         if (x->km.state != XFRM_STATE_VALID)
1346                 return -EINVAL;
1347
1348         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1349             x->curlft.packets >= x->lft.hard_packet_limit) {
1350                 x->km.state = XFRM_STATE_EXPIRED;
1351                 mod_timer(&x->timer, jiffies);
1352                 return -EINVAL;
1353         }
1354
1355         if (!x->km.dying &&
1356             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1357              x->curlft.packets >= x->lft.soft_packet_limit)) {
1358                 x->km.dying = 1;
1359                 km_state_expired(x, 0, 0);
1360         }
1361         return 0;
1362 }
1363 EXPORT_SYMBOL(xfrm_state_check_expire);
1364
1365 struct xfrm_state *
1366 xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto,
1367                   unsigned short family)
1368 {
1369         struct xfrm_state *x;
1370
1371         spin_lock_bh(&xfrm_state_lock);
1372         x = __xfrm_state_lookup(net, daddr, spi, proto, family);
1373         spin_unlock_bh(&xfrm_state_lock);
1374         return x;
1375 }
1376 EXPORT_SYMBOL(xfrm_state_lookup);
1377
1378 struct xfrm_state *
1379 xfrm_state_lookup_byaddr(struct net *net,
1380                          xfrm_address_t *daddr, xfrm_address_t *saddr,
1381                          u8 proto, unsigned short family)
1382 {
1383         struct xfrm_state *x;
1384
1385         spin_lock_bh(&xfrm_state_lock);
1386         x = __xfrm_state_lookup_byaddr(net, daddr, saddr, proto, family);
1387         spin_unlock_bh(&xfrm_state_lock);
1388         return x;
1389 }
1390 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1391
1392 struct xfrm_state *
1393 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1394               xfrm_address_t *daddr, xfrm_address_t *saddr,
1395               int create, unsigned short family)
1396 {
1397         struct xfrm_state *x;
1398
1399         spin_lock_bh(&xfrm_state_lock);
1400         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1401         spin_unlock_bh(&xfrm_state_lock);
1402
1403         return x;
1404 }
1405 EXPORT_SYMBOL(xfrm_find_acq);
1406
1407 #ifdef CONFIG_XFRM_SUB_POLICY
1408 int
1409 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1410                unsigned short family)
1411 {
1412         int err = 0;
1413         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1414         if (!afinfo)
1415                 return -EAFNOSUPPORT;
1416
1417         spin_lock_bh(&xfrm_state_lock);
1418         if (afinfo->tmpl_sort)
1419                 err = afinfo->tmpl_sort(dst, src, n);
1420         spin_unlock_bh(&xfrm_state_lock);
1421         xfrm_state_put_afinfo(afinfo);
1422         return err;
1423 }
1424 EXPORT_SYMBOL(xfrm_tmpl_sort);
1425
1426 int
1427 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1428                 unsigned short family)
1429 {
1430         int err = 0;
1431         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1432         if (!afinfo)
1433                 return -EAFNOSUPPORT;
1434
1435         spin_lock_bh(&xfrm_state_lock);
1436         if (afinfo->state_sort)
1437                 err = afinfo->state_sort(dst, src, n);
1438         spin_unlock_bh(&xfrm_state_lock);
1439         xfrm_state_put_afinfo(afinfo);
1440         return err;
1441 }
1442 EXPORT_SYMBOL(xfrm_state_sort);
1443 #endif
1444
1445 /* Silly enough, but I'm lazy to build resolution list */
1446
1447 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1448 {
1449         int i;
1450
1451         for (i = 0; i <= init_net.xfrm.state_hmask; i++) {
1452                 struct hlist_node *entry;
1453                 struct xfrm_state *x;
1454
1455                 hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
1456                         if (x->km.seq == seq &&
1457                             x->km.state == XFRM_STATE_ACQ) {
1458                                 xfrm_state_hold(x);
1459                                 return x;
1460                         }
1461                 }
1462         }
1463         return NULL;
1464 }
1465
1466 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1467 {
1468         struct xfrm_state *x;
1469
1470         spin_lock_bh(&xfrm_state_lock);
1471         x = __xfrm_find_acq_byseq(seq);
1472         spin_unlock_bh(&xfrm_state_lock);
1473         return x;
1474 }
1475 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1476
1477 u32 xfrm_get_acqseq(void)
1478 {
1479         u32 res;
1480         static u32 acqseq;
1481         static DEFINE_SPINLOCK(acqseq_lock);
1482
1483         spin_lock_bh(&acqseq_lock);
1484         res = (++acqseq ? : ++acqseq);
1485         spin_unlock_bh(&acqseq_lock);
1486         return res;
1487 }
1488 EXPORT_SYMBOL(xfrm_get_acqseq);
1489
1490 int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
1491 {
1492         struct net *net = xs_net(x);
1493         unsigned int h;
1494         struct xfrm_state *x0;
1495         int err = -ENOENT;
1496         __be32 minspi = htonl(low);
1497         __be32 maxspi = htonl(high);
1498
1499         spin_lock_bh(&x->lock);
1500         if (x->km.state == XFRM_STATE_DEAD)
1501                 goto unlock;
1502
1503         err = 0;
1504         if (x->id.spi)
1505                 goto unlock;
1506
1507         err = -ENOENT;
1508
1509         if (minspi == maxspi) {
1510                 x0 = xfrm_state_lookup(net, &x->id.daddr, minspi, x->id.proto, x->props.family);
1511                 if (x0) {
1512                         xfrm_state_put(x0);
1513                         goto unlock;
1514                 }
1515                 x->id.spi = minspi;
1516         } else {
1517                 u32 spi = 0;
1518                 for (h=0; h<high-low+1; h++) {
1519                         spi = low + net_random()%(high-low+1);
1520                         x0 = xfrm_state_lookup(net, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1521                         if (x0 == NULL) {
1522                                 x->id.spi = htonl(spi);
1523                                 break;
1524                         }
1525                         xfrm_state_put(x0);
1526                 }
1527         }
1528         if (x->id.spi) {
1529                 spin_lock_bh(&xfrm_state_lock);
1530                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1531                 hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
1532                 spin_unlock_bh(&xfrm_state_lock);
1533
1534                 err = 0;
1535         }
1536
1537 unlock:
1538         spin_unlock_bh(&x->lock);
1539
1540         return err;
1541 }
1542 EXPORT_SYMBOL(xfrm_alloc_spi);
1543
1544 int xfrm_state_walk(struct xfrm_state_walk *walk,
1545                     int (*func)(struct xfrm_state *, int, void*),
1546                     void *data)
1547 {
1548         struct xfrm_state *state;
1549         struct xfrm_state_walk *x;
1550         int err = 0;
1551
1552         if (walk->seq != 0 && list_empty(&walk->all))
1553                 return 0;
1554
1555         spin_lock_bh(&xfrm_state_lock);
1556         if (list_empty(&walk->all))
1557                 x = list_first_entry(&init_net.xfrm.state_all, struct xfrm_state_walk, all);
1558         else
1559                 x = list_entry(&walk->all, struct xfrm_state_walk, all);
1560         list_for_each_entry_from(x, &init_net.xfrm.state_all, all) {
1561                 if (x->state == XFRM_STATE_DEAD)
1562                         continue;
1563                 state = container_of(x, struct xfrm_state, km);
1564                 if (!xfrm_id_proto_match(state->id.proto, walk->proto))
1565                         continue;
1566                 err = func(state, walk->seq, data);
1567                 if (err) {
1568                         list_move_tail(&walk->all, &x->all);
1569                         goto out;
1570                 }
1571                 walk->seq++;
1572         }
1573         if (walk->seq == 0) {
1574                 err = -ENOENT;
1575                 goto out;
1576         }
1577         list_del_init(&walk->all);
1578 out:
1579         spin_unlock_bh(&xfrm_state_lock);
1580         return err;
1581 }
1582 EXPORT_SYMBOL(xfrm_state_walk);
1583
1584 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
1585 {
1586         INIT_LIST_HEAD(&walk->all);
1587         walk->proto = proto;
1588         walk->state = XFRM_STATE_DEAD;
1589         walk->seq = 0;
1590 }
1591 EXPORT_SYMBOL(xfrm_state_walk_init);
1592
1593 void xfrm_state_walk_done(struct xfrm_state_walk *walk)
1594 {
1595         if (list_empty(&walk->all))
1596                 return;
1597
1598         spin_lock_bh(&xfrm_state_lock);
1599         list_del(&walk->all);
1600         spin_lock_bh(&xfrm_state_lock);
1601 }
1602 EXPORT_SYMBOL(xfrm_state_walk_done);
1603
1604
1605 void xfrm_replay_notify(struct xfrm_state *x, int event)
1606 {
1607         struct km_event c;
1608         /* we send notify messages in case
1609          *  1. we updated on of the sequence numbers, and the seqno difference
1610          *     is at least x->replay_maxdiff, in this case we also update the
1611          *     timeout of our timer function
1612          *  2. if x->replay_maxage has elapsed since last update,
1613          *     and there were changes
1614          *
1615          *  The state structure must be locked!
1616          */
1617
1618         switch (event) {
1619         case XFRM_REPLAY_UPDATE:
1620                 if (x->replay_maxdiff &&
1621                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1622                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1623                         if (x->xflags & XFRM_TIME_DEFER)
1624                                 event = XFRM_REPLAY_TIMEOUT;
1625                         else
1626                                 return;
1627                 }
1628
1629                 break;
1630
1631         case XFRM_REPLAY_TIMEOUT:
1632                 if ((x->replay.seq == x->preplay.seq) &&
1633                     (x->replay.bitmap == x->preplay.bitmap) &&
1634                     (x->replay.oseq == x->preplay.oseq)) {
1635                         x->xflags |= XFRM_TIME_DEFER;
1636                         return;
1637                 }
1638
1639                 break;
1640         }
1641
1642         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1643         c.event = XFRM_MSG_NEWAE;
1644         c.data.aevent = event;
1645         km_state_notify(x, &c);
1646
1647         if (x->replay_maxage &&
1648             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1649                 x->xflags &= ~XFRM_TIME_DEFER;
1650 }
1651
1652 static void xfrm_replay_timer_handler(unsigned long data)
1653 {
1654         struct xfrm_state *x = (struct xfrm_state*)data;
1655
1656         spin_lock(&x->lock);
1657
1658         if (x->km.state == XFRM_STATE_VALID) {
1659                 if (xfrm_aevent_is_on())
1660                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1661                 else
1662                         x->xflags |= XFRM_TIME_DEFER;
1663         }
1664
1665         spin_unlock(&x->lock);
1666 }
1667
1668 int xfrm_replay_check(struct xfrm_state *x,
1669                       struct sk_buff *skb, __be32 net_seq)
1670 {
1671         u32 diff;
1672         u32 seq = ntohl(net_seq);
1673
1674         if (unlikely(seq == 0))
1675                 goto err;
1676
1677         if (likely(seq > x->replay.seq))
1678                 return 0;
1679
1680         diff = x->replay.seq - seq;
1681         if (diff >= min_t(unsigned int, x->props.replay_window,
1682                           sizeof(x->replay.bitmap) * 8)) {
1683                 x->stats.replay_window++;
1684                 goto err;
1685         }
1686
1687         if (x->replay.bitmap & (1U << diff)) {
1688                 x->stats.replay++;
1689                 goto err;
1690         }
1691         return 0;
1692
1693 err:
1694         xfrm_audit_state_replay(x, skb, net_seq);
1695         return -EINVAL;
1696 }
1697
1698 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1699 {
1700         u32 diff;
1701         u32 seq = ntohl(net_seq);
1702
1703         if (seq > x->replay.seq) {
1704                 diff = seq - x->replay.seq;
1705                 if (diff < x->props.replay_window)
1706                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1707                 else
1708                         x->replay.bitmap = 1;
1709                 x->replay.seq = seq;
1710         } else {
1711                 diff = x->replay.seq - seq;
1712                 x->replay.bitmap |= (1U << diff);
1713         }
1714
1715         if (xfrm_aevent_is_on())
1716                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1717 }
1718
1719 static LIST_HEAD(xfrm_km_list);
1720 static DEFINE_RWLOCK(xfrm_km_lock);
1721
1722 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1723 {
1724         struct xfrm_mgr *km;
1725
1726         read_lock(&xfrm_km_lock);
1727         list_for_each_entry(km, &xfrm_km_list, list)
1728                 if (km->notify_policy)
1729                         km->notify_policy(xp, dir, c);
1730         read_unlock(&xfrm_km_lock);
1731 }
1732
1733 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1734 {
1735         struct xfrm_mgr *km;
1736         read_lock(&xfrm_km_lock);
1737         list_for_each_entry(km, &xfrm_km_list, list)
1738                 if (km->notify)
1739                         km->notify(x, c);
1740         read_unlock(&xfrm_km_lock);
1741 }
1742
1743 EXPORT_SYMBOL(km_policy_notify);
1744 EXPORT_SYMBOL(km_state_notify);
1745
1746 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1747 {
1748         struct net *net = xs_net(x);
1749         struct km_event c;
1750
1751         c.data.hard = hard;
1752         c.pid = pid;
1753         c.event = XFRM_MSG_EXPIRE;
1754         km_state_notify(x, &c);
1755
1756         if (hard)
1757                 wake_up(&net->xfrm.km_waitq);
1758 }
1759
1760 EXPORT_SYMBOL(km_state_expired);
1761 /*
1762  * We send to all registered managers regardless of failure
1763  * We are happy with one success
1764 */
1765 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1766 {
1767         int err = -EINVAL, acqret;
1768         struct xfrm_mgr *km;
1769
1770         read_lock(&xfrm_km_lock);
1771         list_for_each_entry(km, &xfrm_km_list, list) {
1772                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1773                 if (!acqret)
1774                         err = acqret;
1775         }
1776         read_unlock(&xfrm_km_lock);
1777         return err;
1778 }
1779 EXPORT_SYMBOL(km_query);
1780
1781 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1782 {
1783         int err = -EINVAL;
1784         struct xfrm_mgr *km;
1785
1786         read_lock(&xfrm_km_lock);
1787         list_for_each_entry(km, &xfrm_km_list, list) {
1788                 if (km->new_mapping)
1789                         err = km->new_mapping(x, ipaddr, sport);
1790                 if (!err)
1791                         break;
1792         }
1793         read_unlock(&xfrm_km_lock);
1794         return err;
1795 }
1796 EXPORT_SYMBOL(km_new_mapping);
1797
1798 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1799 {
1800         struct net *net = xp_net(pol);
1801         struct km_event c;
1802
1803         c.data.hard = hard;
1804         c.pid = pid;
1805         c.event = XFRM_MSG_POLEXPIRE;
1806         km_policy_notify(pol, dir, &c);
1807
1808         if (hard)
1809                 wake_up(&net->xfrm.km_waitq);
1810 }
1811 EXPORT_SYMBOL(km_policy_expired);
1812
1813 #ifdef CONFIG_XFRM_MIGRATE
1814 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1815                struct xfrm_migrate *m, int num_migrate,
1816                struct xfrm_kmaddress *k)
1817 {
1818         int err = -EINVAL;
1819         int ret;
1820         struct xfrm_mgr *km;
1821
1822         read_lock(&xfrm_km_lock);
1823         list_for_each_entry(km, &xfrm_km_list, list) {
1824                 if (km->migrate) {
1825                         ret = km->migrate(sel, dir, type, m, num_migrate, k);
1826                         if (!ret)
1827                                 err = ret;
1828                 }
1829         }
1830         read_unlock(&xfrm_km_lock);
1831         return err;
1832 }
1833 EXPORT_SYMBOL(km_migrate);
1834 #endif
1835
1836 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1837 {
1838         int err = -EINVAL;
1839         int ret;
1840         struct xfrm_mgr *km;
1841
1842         read_lock(&xfrm_km_lock);
1843         list_for_each_entry(km, &xfrm_km_list, list) {
1844                 if (km->report) {
1845                         ret = km->report(proto, sel, addr);
1846                         if (!ret)
1847                                 err = ret;
1848                 }
1849         }
1850         read_unlock(&xfrm_km_lock);
1851         return err;
1852 }
1853 EXPORT_SYMBOL(km_report);
1854
1855 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1856 {
1857         int err;
1858         u8 *data;
1859         struct xfrm_mgr *km;
1860         struct xfrm_policy *pol = NULL;
1861
1862         if (optlen <= 0 || optlen > PAGE_SIZE)
1863                 return -EMSGSIZE;
1864
1865         data = kmalloc(optlen, GFP_KERNEL);
1866         if (!data)
1867                 return -ENOMEM;
1868
1869         err = -EFAULT;
1870         if (copy_from_user(data, optval, optlen))
1871                 goto out;
1872
1873         err = -EINVAL;
1874         read_lock(&xfrm_km_lock);
1875         list_for_each_entry(km, &xfrm_km_list, list) {
1876                 pol = km->compile_policy(sk, optname, data,
1877                                          optlen, &err);
1878                 if (err >= 0)
1879                         break;
1880         }
1881         read_unlock(&xfrm_km_lock);
1882
1883         if (err >= 0) {
1884                 xfrm_sk_policy_insert(sk, err, pol);
1885                 xfrm_pol_put(pol);
1886                 err = 0;
1887         }
1888
1889 out:
1890         kfree(data);
1891         return err;
1892 }
1893 EXPORT_SYMBOL(xfrm_user_policy);
1894
1895 int xfrm_register_km(struct xfrm_mgr *km)
1896 {
1897         write_lock_bh(&xfrm_km_lock);
1898         list_add_tail(&km->list, &xfrm_km_list);
1899         write_unlock_bh(&xfrm_km_lock);
1900         return 0;
1901 }
1902 EXPORT_SYMBOL(xfrm_register_km);
1903
1904 int xfrm_unregister_km(struct xfrm_mgr *km)
1905 {
1906         write_lock_bh(&xfrm_km_lock);
1907         list_del(&km->list);
1908         write_unlock_bh(&xfrm_km_lock);
1909         return 0;
1910 }
1911 EXPORT_SYMBOL(xfrm_unregister_km);
1912
1913 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1914 {
1915         int err = 0;
1916         if (unlikely(afinfo == NULL))
1917                 return -EINVAL;
1918         if (unlikely(afinfo->family >= NPROTO))
1919                 return -EAFNOSUPPORT;
1920         write_lock_bh(&xfrm_state_afinfo_lock);
1921         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1922                 err = -ENOBUFS;
1923         else
1924                 xfrm_state_afinfo[afinfo->family] = afinfo;
1925         write_unlock_bh(&xfrm_state_afinfo_lock);
1926         return err;
1927 }
1928 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1929
1930 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1931 {
1932         int err = 0;
1933         if (unlikely(afinfo == NULL))
1934                 return -EINVAL;
1935         if (unlikely(afinfo->family >= NPROTO))
1936                 return -EAFNOSUPPORT;
1937         write_lock_bh(&xfrm_state_afinfo_lock);
1938         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1939                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1940                         err = -EINVAL;
1941                 else
1942                         xfrm_state_afinfo[afinfo->family] = NULL;
1943         }
1944         write_unlock_bh(&xfrm_state_afinfo_lock);
1945         return err;
1946 }
1947 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1948
1949 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
1950 {
1951         struct xfrm_state_afinfo *afinfo;
1952         if (unlikely(family >= NPROTO))
1953                 return NULL;
1954         read_lock(&xfrm_state_afinfo_lock);
1955         afinfo = xfrm_state_afinfo[family];
1956         if (unlikely(!afinfo))
1957                 read_unlock(&xfrm_state_afinfo_lock);
1958         return afinfo;
1959 }
1960
1961 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1962         __releases(xfrm_state_afinfo_lock)
1963 {
1964         read_unlock(&xfrm_state_afinfo_lock);
1965 }
1966
1967 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1968 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1969 {
1970         if (x->tunnel) {
1971                 struct xfrm_state *t = x->tunnel;
1972
1973                 if (atomic_read(&t->tunnel_users) == 2)
1974                         xfrm_state_delete(t);
1975                 atomic_dec(&t->tunnel_users);
1976                 xfrm_state_put(t);
1977                 x->tunnel = NULL;
1978         }
1979 }
1980 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1981
1982 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1983 {
1984         int res;
1985
1986         spin_lock_bh(&x->lock);
1987         if (x->km.state == XFRM_STATE_VALID &&
1988             x->type && x->type->get_mtu)
1989                 res = x->type->get_mtu(x, mtu);
1990         else
1991                 res = mtu - x->props.header_len;
1992         spin_unlock_bh(&x->lock);
1993         return res;
1994 }
1995
1996 int xfrm_init_state(struct xfrm_state *x)
1997 {
1998         struct xfrm_state_afinfo *afinfo;
1999         struct xfrm_mode *inner_mode;
2000         int family = x->props.family;
2001         int err;
2002
2003         err = -EAFNOSUPPORT;
2004         afinfo = xfrm_state_get_afinfo(family);
2005         if (!afinfo)
2006                 goto error;
2007
2008         err = 0;
2009         if (afinfo->init_flags)
2010                 err = afinfo->init_flags(x);
2011
2012         xfrm_state_put_afinfo(afinfo);
2013
2014         if (err)
2015                 goto error;
2016
2017         err = -EPROTONOSUPPORT;
2018
2019         if (x->sel.family != AF_UNSPEC) {
2020                 inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
2021                 if (inner_mode == NULL)
2022                         goto error;
2023
2024                 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2025                     family != x->sel.family) {
2026                         xfrm_put_mode(inner_mode);
2027                         goto error;
2028                 }
2029
2030                 x->inner_mode = inner_mode;
2031         } else {
2032                 struct xfrm_mode *inner_mode_iaf;
2033
2034                 inner_mode = xfrm_get_mode(x->props.mode, AF_INET);
2035                 if (inner_mode == NULL)
2036                         goto error;
2037
2038                 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
2039                         xfrm_put_mode(inner_mode);
2040                         goto error;
2041                 }
2042
2043                 inner_mode_iaf = xfrm_get_mode(x->props.mode, AF_INET6);
2044                 if (inner_mode_iaf == NULL)
2045                         goto error;
2046
2047                 if (!(inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)) {
2048                         xfrm_put_mode(inner_mode_iaf);
2049                         goto error;
2050                 }
2051
2052                 if (x->props.family == AF_INET) {
2053                         x->inner_mode = inner_mode;
2054                         x->inner_mode_iaf = inner_mode_iaf;
2055                 } else {
2056                         x->inner_mode = inner_mode_iaf;
2057                         x->inner_mode_iaf = inner_mode;
2058                 }
2059         }
2060
2061         x->type = xfrm_get_type(x->id.proto, family);
2062         if (x->type == NULL)
2063                 goto error;
2064
2065         err = x->type->init_state(x);
2066         if (err)
2067                 goto error;
2068
2069         x->outer_mode = xfrm_get_mode(x->props.mode, family);
2070         if (x->outer_mode == NULL)
2071                 goto error;
2072
2073         x->km.state = XFRM_STATE_VALID;
2074
2075 error:
2076         return err;
2077 }
2078
2079 EXPORT_SYMBOL(xfrm_init_state);
2080
2081 int __net_init xfrm_state_init(struct net *net)
2082 {
2083         unsigned int sz;
2084
2085         INIT_LIST_HEAD(&net->xfrm.state_all);
2086
2087         sz = sizeof(struct hlist_head) * 8;
2088
2089         net->xfrm.state_bydst = xfrm_hash_alloc(sz);
2090         if (!net->xfrm.state_bydst)
2091                 goto out_bydst;
2092         net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
2093         if (!net->xfrm.state_bysrc)
2094                 goto out_bysrc;
2095         net->xfrm.state_byspi = xfrm_hash_alloc(sz);
2096         if (!net->xfrm.state_byspi)
2097                 goto out_byspi;
2098         net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
2099
2100         net->xfrm.state_num = 0;
2101         INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
2102         INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
2103         INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
2104         init_waitqueue_head(&net->xfrm.km_waitq);
2105         return 0;
2106
2107 out_byspi:
2108         xfrm_hash_free(net->xfrm.state_bysrc, sz);
2109 out_bysrc:
2110         xfrm_hash_free(net->xfrm.state_bydst, sz);
2111 out_bydst:
2112         return -ENOMEM;
2113 }
2114
2115 void xfrm_state_fini(struct net *net)
2116 {
2117         unsigned int sz;
2118
2119         WARN_ON(!list_empty(&net->xfrm.state_all));
2120
2121         sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
2122         WARN_ON(!hlist_empty(net->xfrm.state_byspi));
2123         xfrm_hash_free(net->xfrm.state_byspi, sz);
2124         WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
2125         xfrm_hash_free(net->xfrm.state_bysrc, sz);
2126         WARN_ON(!hlist_empty(net->xfrm.state_bydst));
2127         xfrm_hash_free(net->xfrm.state_bydst, sz);
2128 }
2129
2130 #ifdef CONFIG_AUDITSYSCALL
2131 static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2132                                      struct audit_buffer *audit_buf)
2133 {
2134         struct xfrm_sec_ctx *ctx = x->security;
2135         u32 spi = ntohl(x->id.spi);
2136
2137         if (ctx)
2138                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2139                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2140
2141         switch(x->props.family) {
2142         case AF_INET:
2143                 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2144                                  &x->props.saddr.a4, &x->id.daddr.a4);
2145                 break;
2146         case AF_INET6:
2147                 audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
2148                                  x->props.saddr.a6, x->id.daddr.a6);
2149                 break;
2150         }
2151
2152         audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2153 }
2154
2155 static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2156                                       struct audit_buffer *audit_buf)
2157 {
2158         struct iphdr *iph4;
2159         struct ipv6hdr *iph6;
2160
2161         switch (family) {
2162         case AF_INET:
2163                 iph4 = ip_hdr(skb);
2164                 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2165                                  &iph4->saddr, &iph4->daddr);
2166                 break;
2167         case AF_INET6:
2168                 iph6 = ipv6_hdr(skb);
2169                 audit_log_format(audit_buf,
2170                                  " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
2171                                  &iph6->saddr,&iph6->daddr,
2172                                  iph6->flow_lbl[0] & 0x0f,
2173                                  iph6->flow_lbl[1],
2174                                  iph6->flow_lbl[2]);
2175                 break;
2176         }
2177 }
2178
2179 void xfrm_audit_state_add(struct xfrm_state *x, int result,
2180                           uid_t auid, u32 sessionid, u32 secid)
2181 {
2182         struct audit_buffer *audit_buf;
2183
2184         audit_buf = xfrm_audit_start("SAD-add");
2185         if (audit_buf == NULL)
2186                 return;
2187         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2188         xfrm_audit_helper_sainfo(x, audit_buf);
2189         audit_log_format(audit_buf, " res=%u", result);
2190         audit_log_end(audit_buf);
2191 }
2192 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2193
2194 void xfrm_audit_state_delete(struct xfrm_state *x, int result,
2195                              uid_t auid, u32 sessionid, u32 secid)
2196 {
2197         struct audit_buffer *audit_buf;
2198
2199         audit_buf = xfrm_audit_start("SAD-delete");
2200         if (audit_buf == NULL)
2201                 return;
2202         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2203         xfrm_audit_helper_sainfo(x, audit_buf);
2204         audit_log_format(audit_buf, " res=%u", result);
2205         audit_log_end(audit_buf);
2206 }
2207 EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2208
2209 void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2210                                       struct sk_buff *skb)
2211 {
2212         struct audit_buffer *audit_buf;
2213         u32 spi;
2214
2215         audit_buf = xfrm_audit_start("SA-replay-overflow");
2216         if (audit_buf == NULL)
2217                 return;
2218         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2219         /* don't record the sequence number because it's inherent in this kind
2220          * of audit message */
2221         spi = ntohl(x->id.spi);
2222         audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2223         audit_log_end(audit_buf);
2224 }
2225 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2226
2227 static void xfrm_audit_state_replay(struct xfrm_state *x,
2228                              struct sk_buff *skb, __be32 net_seq)
2229 {
2230         struct audit_buffer *audit_buf;
2231         u32 spi;
2232
2233         audit_buf = xfrm_audit_start("SA-replayed-pkt");
2234         if (audit_buf == NULL)
2235                 return;
2236         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2237         spi = ntohl(x->id.spi);
2238         audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2239                          spi, spi, ntohl(net_seq));
2240         audit_log_end(audit_buf);
2241 }
2242
2243 void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2244 {
2245         struct audit_buffer *audit_buf;
2246
2247         audit_buf = xfrm_audit_start("SA-notfound");
2248         if (audit_buf == NULL)
2249                 return;
2250         xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2251         audit_log_end(audit_buf);
2252 }
2253 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
2254
2255 void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
2256                                __be32 net_spi, __be32 net_seq)
2257 {
2258         struct audit_buffer *audit_buf;
2259         u32 spi;
2260
2261         audit_buf = xfrm_audit_start("SA-notfound");
2262         if (audit_buf == NULL)
2263                 return;
2264         xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2265         spi = ntohl(net_spi);
2266         audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2267                          spi, spi, ntohl(net_seq));
2268         audit_log_end(audit_buf);
2269 }
2270 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
2271
2272 void xfrm_audit_state_icvfail(struct xfrm_state *x,
2273                               struct sk_buff *skb, u8 proto)
2274 {
2275         struct audit_buffer *audit_buf;
2276         __be32 net_spi;
2277         __be32 net_seq;
2278
2279         audit_buf = xfrm_audit_start("SA-icv-failure");
2280         if (audit_buf == NULL)
2281                 return;
2282         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2283         if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
2284                 u32 spi = ntohl(net_spi);
2285                 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2286                                  spi, spi, ntohl(net_seq));
2287         }
2288         audit_log_end(audit_buf);
2289 }
2290 EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
2291 #endif /* CONFIG_AUDITSYSCALL */