audit: Add auditing to ipsec
[safe/jmp/linux-2.6] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
24
25 #include "xfrm_hash.h"
26
27 struct sock *xfrm_nl;
28 EXPORT_SYMBOL(xfrm_nl);
29
30 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
31 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32
33 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
34 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35
36 /* Each xfrm_state may be linked to two tables:
37
38    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40       destination/tunnel endpoint. (output)
41  */
42
43 static DEFINE_SPINLOCK(xfrm_state_lock);
44
45 /* Hash table to find appropriate SA towards given target (endpoint
46  * of tunnel or destination of transport mode) allowed by selector.
47  *
48  * Main use is finding SA after policy selected tunnel or transport mode.
49  * Also, it can be used by ah/esp icmp error handler to find offending SA.
50  */
51 static struct hlist_head *xfrm_state_bydst __read_mostly;
52 static struct hlist_head *xfrm_state_bysrc __read_mostly;
53 static struct hlist_head *xfrm_state_byspi __read_mostly;
54 static unsigned int xfrm_state_hmask __read_mostly;
55 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
56 static unsigned int xfrm_state_num;
57 static unsigned int xfrm_state_genid;
58
59 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
60                                          xfrm_address_t *saddr,
61                                          u32 reqid,
62                                          unsigned short family)
63 {
64         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
65 }
66
67 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
68                                          xfrm_address_t *saddr,
69                                          unsigned short family)
70 {
71         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
72 }
73
74 static inline unsigned int
75 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
76 {
77         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
78 }
79
80 static void xfrm_hash_transfer(struct hlist_head *list,
81                                struct hlist_head *ndsttable,
82                                struct hlist_head *nsrctable,
83                                struct hlist_head *nspitable,
84                                unsigned int nhashmask)
85 {
86         struct hlist_node *entry, *tmp;
87         struct xfrm_state *x;
88
89         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
90                 unsigned int h;
91
92                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
93                                     x->props.reqid, x->props.family,
94                                     nhashmask);
95                 hlist_add_head(&x->bydst, ndsttable+h);
96
97                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
98                                     x->props.family,
99                                     nhashmask);
100                 hlist_add_head(&x->bysrc, nsrctable+h);
101
102                 if (x->id.spi) {
103                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
104                                             x->id.proto, x->props.family,
105                                             nhashmask);
106                         hlist_add_head(&x->byspi, nspitable+h);
107                 }
108         }
109 }
110
111 static unsigned long xfrm_hash_new_size(void)
112 {
113         return ((xfrm_state_hmask + 1) << 1) *
114                 sizeof(struct hlist_head);
115 }
116
117 static DEFINE_MUTEX(hash_resize_mutex);
118
119 static void xfrm_hash_resize(struct work_struct *__unused)
120 {
121         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
122         unsigned long nsize, osize;
123         unsigned int nhashmask, ohashmask;
124         int i;
125
126         mutex_lock(&hash_resize_mutex);
127
128         nsize = xfrm_hash_new_size();
129         ndst = xfrm_hash_alloc(nsize);
130         if (!ndst)
131                 goto out_unlock;
132         nsrc = xfrm_hash_alloc(nsize);
133         if (!nsrc) {
134                 xfrm_hash_free(ndst, nsize);
135                 goto out_unlock;
136         }
137         nspi = xfrm_hash_alloc(nsize);
138         if (!nspi) {
139                 xfrm_hash_free(ndst, nsize);
140                 xfrm_hash_free(nsrc, nsize);
141                 goto out_unlock;
142         }
143
144         spin_lock_bh(&xfrm_state_lock);
145
146         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
147         for (i = xfrm_state_hmask; i >= 0; i--)
148                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
149                                    nhashmask);
150
151         odst = xfrm_state_bydst;
152         osrc = xfrm_state_bysrc;
153         ospi = xfrm_state_byspi;
154         ohashmask = xfrm_state_hmask;
155
156         xfrm_state_bydst = ndst;
157         xfrm_state_bysrc = nsrc;
158         xfrm_state_byspi = nspi;
159         xfrm_state_hmask = nhashmask;
160
161         spin_unlock_bh(&xfrm_state_lock);
162
163         osize = (ohashmask + 1) * sizeof(struct hlist_head);
164         xfrm_hash_free(odst, osize);
165         xfrm_hash_free(osrc, osize);
166         xfrm_hash_free(ospi, osize);
167
168 out_unlock:
169         mutex_unlock(&hash_resize_mutex);
170 }
171
172 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
173
174 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
175 EXPORT_SYMBOL(km_waitq);
176
177 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
178 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
179
180 static struct work_struct xfrm_state_gc_work;
181 static HLIST_HEAD(xfrm_state_gc_list);
182 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
183
184 int __xfrm_state_delete(struct xfrm_state *x);
185
186 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
187 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
188
189 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
190 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
191
192 static void xfrm_state_gc_destroy(struct xfrm_state *x)
193 {
194         del_timer_sync(&x->timer);
195         del_timer_sync(&x->rtimer);
196         kfree(x->aalg);
197         kfree(x->ealg);
198         kfree(x->calg);
199         kfree(x->encap);
200         kfree(x->coaddr);
201         if (x->mode)
202                 xfrm_put_mode(x->mode);
203         if (x->type) {
204                 x->type->destructor(x);
205                 xfrm_put_type(x->type);
206         }
207         security_xfrm_state_free(x);
208         kfree(x);
209 }
210
211 static void xfrm_state_gc_task(struct work_struct *data)
212 {
213         struct xfrm_state *x;
214         struct hlist_node *entry, *tmp;
215         struct hlist_head gc_list;
216
217         spin_lock_bh(&xfrm_state_gc_lock);
218         gc_list.first = xfrm_state_gc_list.first;
219         INIT_HLIST_HEAD(&xfrm_state_gc_list);
220         spin_unlock_bh(&xfrm_state_gc_lock);
221
222         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
223                 xfrm_state_gc_destroy(x);
224
225         wake_up(&km_waitq);
226 }
227
228 static inline unsigned long make_jiffies(long secs)
229 {
230         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
231                 return MAX_SCHEDULE_TIMEOUT-1;
232         else
233                 return secs*HZ;
234 }
235
236 static void xfrm_timer_handler(unsigned long data)
237 {
238         struct xfrm_state *x = (struct xfrm_state*)data;
239         unsigned long now = (unsigned long)xtime.tv_sec;
240         long next = LONG_MAX;
241         int warn = 0;
242         int err = 0;
243
244         spin_lock(&x->lock);
245         if (x->km.state == XFRM_STATE_DEAD)
246                 goto out;
247         if (x->km.state == XFRM_STATE_EXPIRED)
248                 goto expired;
249         if (x->lft.hard_add_expires_seconds) {
250                 long tmo = x->lft.hard_add_expires_seconds +
251                         x->curlft.add_time - now;
252                 if (tmo <= 0)
253                         goto expired;
254                 if (tmo < next)
255                         next = tmo;
256         }
257         if (x->lft.hard_use_expires_seconds) {
258                 long tmo = x->lft.hard_use_expires_seconds +
259                         (x->curlft.use_time ? : now) - now;
260                 if (tmo <= 0)
261                         goto expired;
262                 if (tmo < next)
263                         next = tmo;
264         }
265         if (x->km.dying)
266                 goto resched;
267         if (x->lft.soft_add_expires_seconds) {
268                 long tmo = x->lft.soft_add_expires_seconds +
269                         x->curlft.add_time - now;
270                 if (tmo <= 0)
271                         warn = 1;
272                 else if (tmo < next)
273                         next = tmo;
274         }
275         if (x->lft.soft_use_expires_seconds) {
276                 long tmo = x->lft.soft_use_expires_seconds +
277                         (x->curlft.use_time ? : now) - now;
278                 if (tmo <= 0)
279                         warn = 1;
280                 else if (tmo < next)
281                         next = tmo;
282         }
283
284         x->km.dying = warn;
285         if (warn)
286                 km_state_expired(x, 0, 0);
287 resched:
288         if (next != LONG_MAX)
289                 mod_timer(&x->timer, jiffies + make_jiffies(next));
290
291         goto out;
292
293 expired:
294         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
295                 x->km.state = XFRM_STATE_EXPIRED;
296                 wake_up(&km_waitq);
297                 next = 2;
298                 goto resched;
299         }
300
301         err = __xfrm_state_delete(x);
302         if (!err && x->id.spi)
303                 km_state_expired(x, 1, 0);
304
305         xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
306                        AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
307
308 out:
309         spin_unlock(&x->lock);
310 }
311
312 static void xfrm_replay_timer_handler(unsigned long data);
313
314 struct xfrm_state *xfrm_state_alloc(void)
315 {
316         struct xfrm_state *x;
317
318         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
319
320         if (x) {
321                 atomic_set(&x->refcnt, 1);
322                 atomic_set(&x->tunnel_users, 0);
323                 INIT_HLIST_NODE(&x->bydst);
324                 INIT_HLIST_NODE(&x->bysrc);
325                 INIT_HLIST_NODE(&x->byspi);
326                 init_timer(&x->timer);
327                 x->timer.function = xfrm_timer_handler;
328                 x->timer.data     = (unsigned long)x;
329                 init_timer(&x->rtimer);
330                 x->rtimer.function = xfrm_replay_timer_handler;
331                 x->rtimer.data     = (unsigned long)x;
332                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
333                 x->lft.soft_byte_limit = XFRM_INF;
334                 x->lft.soft_packet_limit = XFRM_INF;
335                 x->lft.hard_byte_limit = XFRM_INF;
336                 x->lft.hard_packet_limit = XFRM_INF;
337                 x->replay_maxage = 0;
338                 x->replay_maxdiff = 0;
339                 spin_lock_init(&x->lock);
340         }
341         return x;
342 }
343 EXPORT_SYMBOL(xfrm_state_alloc);
344
345 void __xfrm_state_destroy(struct xfrm_state *x)
346 {
347         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
348
349         spin_lock_bh(&xfrm_state_gc_lock);
350         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
351         spin_unlock_bh(&xfrm_state_gc_lock);
352         schedule_work(&xfrm_state_gc_work);
353 }
354 EXPORT_SYMBOL(__xfrm_state_destroy);
355
356 int __xfrm_state_delete(struct xfrm_state *x)
357 {
358         int err = -ESRCH;
359
360         if (x->km.state != XFRM_STATE_DEAD) {
361                 x->km.state = XFRM_STATE_DEAD;
362                 spin_lock(&xfrm_state_lock);
363                 hlist_del(&x->bydst);
364                 hlist_del(&x->bysrc);
365                 if (x->id.spi)
366                         hlist_del(&x->byspi);
367                 xfrm_state_num--;
368                 spin_unlock(&xfrm_state_lock);
369
370                 /* All xfrm_state objects are created by xfrm_state_alloc.
371                  * The xfrm_state_alloc call gives a reference, and that
372                  * is what we are dropping here.
373                  */
374                 __xfrm_state_put(x);
375                 err = 0;
376         }
377
378         return err;
379 }
380 EXPORT_SYMBOL(__xfrm_state_delete);
381
382 int xfrm_state_delete(struct xfrm_state *x)
383 {
384         int err;
385
386         spin_lock_bh(&x->lock);
387         err = __xfrm_state_delete(x);
388         spin_unlock_bh(&x->lock);
389
390         return err;
391 }
392 EXPORT_SYMBOL(xfrm_state_delete);
393
394 void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
395 {
396         int i;
397         int err = 0;
398
399         spin_lock_bh(&xfrm_state_lock);
400         for (i = 0; i <= xfrm_state_hmask; i++) {
401                 struct hlist_node *entry;
402                 struct xfrm_state *x;
403 restart:
404                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
405                         if (!xfrm_state_kern(x) &&
406                             xfrm_id_proto_match(x->id.proto, proto)) {
407                                 xfrm_state_hold(x);
408                                 spin_unlock_bh(&xfrm_state_lock);
409
410                                 xfrm_state_delete(x);
411                                 err = xfrm_state_delete(x);
412                                 xfrm_audit_log(audit_info->loginuid,
413                                                audit_info->secid,
414                                                AUDIT_MAC_IPSEC_DELSA,
415                                                err ? 0 : 1, NULL, x);
416                                 xfrm_state_put(x);
417
418                                 spin_lock_bh(&xfrm_state_lock);
419                                 goto restart;
420                         }
421                 }
422         }
423         spin_unlock_bh(&xfrm_state_lock);
424         wake_up(&km_waitq);
425 }
426 EXPORT_SYMBOL(xfrm_state_flush);
427
428 static int
429 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
430                   struct xfrm_tmpl *tmpl,
431                   xfrm_address_t *daddr, xfrm_address_t *saddr,
432                   unsigned short family)
433 {
434         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
435         if (!afinfo)
436                 return -1;
437         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
438         xfrm_state_put_afinfo(afinfo);
439         return 0;
440 }
441
442 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
443 {
444         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
445         struct xfrm_state *x;
446         struct hlist_node *entry;
447
448         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
449                 if (x->props.family != family ||
450                     x->id.spi       != spi ||
451                     x->id.proto     != proto)
452                         continue;
453
454                 switch (family) {
455                 case AF_INET:
456                         if (x->id.daddr.a4 != daddr->a4)
457                                 continue;
458                         break;
459                 case AF_INET6:
460                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
461                                              (struct in6_addr *)
462                                              x->id.daddr.a6))
463                                 continue;
464                         break;
465                 };
466
467                 xfrm_state_hold(x);
468                 return x;
469         }
470
471         return NULL;
472 }
473
474 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
475 {
476         unsigned int h = xfrm_src_hash(daddr, saddr, family);
477         struct xfrm_state *x;
478         struct hlist_node *entry;
479
480         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
481                 if (x->props.family != family ||
482                     x->id.proto     != proto)
483                         continue;
484
485                 switch (family) {
486                 case AF_INET:
487                         if (x->id.daddr.a4 != daddr->a4 ||
488                             x->props.saddr.a4 != saddr->a4)
489                                 continue;
490                         break;
491                 case AF_INET6:
492                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
493                                              (struct in6_addr *)
494                                              x->id.daddr.a6) ||
495                             !ipv6_addr_equal((struct in6_addr *)saddr,
496                                              (struct in6_addr *)
497                                              x->props.saddr.a6))
498                                 continue;
499                         break;
500                 };
501
502                 xfrm_state_hold(x);
503                 return x;
504         }
505
506         return NULL;
507 }
508
509 static inline struct xfrm_state *
510 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
511 {
512         if (use_spi)
513                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
514                                            x->id.proto, family);
515         else
516                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
517                                                   &x->props.saddr,
518                                                   x->id.proto, family);
519 }
520
521 static void xfrm_hash_grow_check(int have_hash_collision)
522 {
523         if (have_hash_collision &&
524             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
525             xfrm_state_num > xfrm_state_hmask)
526                 schedule_work(&xfrm_hash_work);
527 }
528
529 struct xfrm_state *
530 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
531                 struct flowi *fl, struct xfrm_tmpl *tmpl,
532                 struct xfrm_policy *pol, int *err,
533                 unsigned short family)
534 {
535         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
536         struct hlist_node *entry;
537         struct xfrm_state *x, *x0;
538         int acquire_in_progress = 0;
539         int error = 0;
540         struct xfrm_state *best = NULL;
541         
542         spin_lock_bh(&xfrm_state_lock);
543         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
544                 if (x->props.family == family &&
545                     x->props.reqid == tmpl->reqid &&
546                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
547                     xfrm_state_addr_check(x, daddr, saddr, family) &&
548                     tmpl->mode == x->props.mode &&
549                     tmpl->id.proto == x->id.proto &&
550                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
551                         /* Resolution logic:
552                            1. There is a valid state with matching selector.
553                               Done.
554                            2. Valid state with inappropriate selector. Skip.
555
556                            Entering area of "sysdeps".
557
558                            3. If state is not valid, selector is temporary,
559                               it selects only session which triggered
560                               previous resolution. Key manager will do
561                               something to install a state with proper
562                               selector.
563                          */
564                         if (x->km.state == XFRM_STATE_VALID) {
565                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
566                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
567                                         continue;
568                                 if (!best ||
569                                     best->km.dying > x->km.dying ||
570                                     (best->km.dying == x->km.dying &&
571                                      best->curlft.add_time < x->curlft.add_time))
572                                         best = x;
573                         } else if (x->km.state == XFRM_STATE_ACQ) {
574                                 acquire_in_progress = 1;
575                         } else if (x->km.state == XFRM_STATE_ERROR ||
576                                    x->km.state == XFRM_STATE_EXPIRED) {
577                                 if (xfrm_selector_match(&x->sel, fl, family) &&
578                                     security_xfrm_state_pol_flow_match(x, pol, fl))
579                                         error = -ESRCH;
580                         }
581                 }
582         }
583
584         x = best;
585         if (!x && !error && !acquire_in_progress) {
586                 if (tmpl->id.spi &&
587                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
588                                               tmpl->id.proto, family)) != NULL) {
589                         xfrm_state_put(x0);
590                         error = -EEXIST;
591                         goto out;
592                 }
593                 x = xfrm_state_alloc();
594                 if (x == NULL) {
595                         error = -ENOMEM;
596                         goto out;
597                 }
598                 /* Initialize temporary selector matching only
599                  * to current session. */
600                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
601
602                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
603                 if (error) {
604                         x->km.state = XFRM_STATE_DEAD;
605                         xfrm_state_put(x);
606                         x = NULL;
607                         goto out;
608                 }
609
610                 if (km_query(x, tmpl, pol) == 0) {
611                         x->km.state = XFRM_STATE_ACQ;
612                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
613                         h = xfrm_src_hash(daddr, saddr, family);
614                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
615                         if (x->id.spi) {
616                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
617                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
618                         }
619                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
620                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
621                         add_timer(&x->timer);
622                         xfrm_state_num++;
623                         xfrm_hash_grow_check(x->bydst.next != NULL);
624                 } else {
625                         x->km.state = XFRM_STATE_DEAD;
626                         xfrm_state_put(x);
627                         x = NULL;
628                         error = -ESRCH;
629                 }
630         }
631 out:
632         if (x)
633                 xfrm_state_hold(x);
634         else
635                 *err = acquire_in_progress ? -EAGAIN : error;
636         spin_unlock_bh(&xfrm_state_lock);
637         return x;
638 }
639
640 static void __xfrm_state_insert(struct xfrm_state *x)
641 {
642         unsigned int h;
643
644         x->genid = ++xfrm_state_genid;
645
646         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
647                           x->props.reqid, x->props.family);
648         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
649
650         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
651         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
652
653         if (x->id.spi) {
654                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
655                                   x->props.family);
656
657                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
658         }
659
660         mod_timer(&x->timer, jiffies + HZ);
661         if (x->replay_maxage)
662                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
663
664         wake_up(&km_waitq);
665
666         xfrm_state_num++;
667
668         xfrm_hash_grow_check(x->bydst.next != NULL);
669 }
670
671 /* xfrm_state_lock is held */
672 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
673 {
674         unsigned short family = xnew->props.family;
675         u32 reqid = xnew->props.reqid;
676         struct xfrm_state *x;
677         struct hlist_node *entry;
678         unsigned int h;
679
680         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
681         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
682                 if (x->props.family     == family &&
683                     x->props.reqid      == reqid &&
684                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
685                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
686                         x->genid = xfrm_state_genid;
687         }
688 }
689
690 void xfrm_state_insert(struct xfrm_state *x)
691 {
692         spin_lock_bh(&xfrm_state_lock);
693         __xfrm_state_bump_genids(x);
694         __xfrm_state_insert(x);
695         spin_unlock_bh(&xfrm_state_lock);
696 }
697 EXPORT_SYMBOL(xfrm_state_insert);
698
699 /* xfrm_state_lock is held */
700 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
701 {
702         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
703         struct hlist_node *entry;
704         struct xfrm_state *x;
705
706         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
707                 if (x->props.reqid  != reqid ||
708                     x->props.mode   != mode ||
709                     x->props.family != family ||
710                     x->km.state     != XFRM_STATE_ACQ ||
711                     x->id.spi       != 0)
712                         continue;
713
714                 switch (family) {
715                 case AF_INET:
716                         if (x->id.daddr.a4    != daddr->a4 ||
717                             x->props.saddr.a4 != saddr->a4)
718                                 continue;
719                         break;
720                 case AF_INET6:
721                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
722                                              (struct in6_addr *)daddr) ||
723                             !ipv6_addr_equal((struct in6_addr *)
724                                              x->props.saddr.a6,
725                                              (struct in6_addr *)saddr))
726                                 continue;
727                         break;
728                 };
729
730                 xfrm_state_hold(x);
731                 return x;
732         }
733
734         if (!create)
735                 return NULL;
736
737         x = xfrm_state_alloc();
738         if (likely(x)) {
739                 switch (family) {
740                 case AF_INET:
741                         x->sel.daddr.a4 = daddr->a4;
742                         x->sel.saddr.a4 = saddr->a4;
743                         x->sel.prefixlen_d = 32;
744                         x->sel.prefixlen_s = 32;
745                         x->props.saddr.a4 = saddr->a4;
746                         x->id.daddr.a4 = daddr->a4;
747                         break;
748
749                 case AF_INET6:
750                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
751                                        (struct in6_addr *)daddr);
752                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
753                                        (struct in6_addr *)saddr);
754                         x->sel.prefixlen_d = 128;
755                         x->sel.prefixlen_s = 128;
756                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
757                                        (struct in6_addr *)saddr);
758                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
759                                        (struct in6_addr *)daddr);
760                         break;
761                 };
762
763                 x->km.state = XFRM_STATE_ACQ;
764                 x->id.proto = proto;
765                 x->props.family = family;
766                 x->props.mode = mode;
767                 x->props.reqid = reqid;
768                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
769                 xfrm_state_hold(x);
770                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
771                 add_timer(&x->timer);
772                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
773                 h = xfrm_src_hash(daddr, saddr, family);
774                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
775                 wake_up(&km_waitq);
776
777                 xfrm_state_num++;
778
779                 xfrm_hash_grow_check(x->bydst.next != NULL);
780         }
781
782         return x;
783 }
784
785 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
786
787 int xfrm_state_add(struct xfrm_state *x)
788 {
789         struct xfrm_state *x1;
790         int family;
791         int err;
792         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
793
794         family = x->props.family;
795
796         spin_lock_bh(&xfrm_state_lock);
797
798         x1 = __xfrm_state_locate(x, use_spi, family);
799         if (x1) {
800                 xfrm_state_put(x1);
801                 x1 = NULL;
802                 err = -EEXIST;
803                 goto out;
804         }
805
806         if (use_spi && x->km.seq) {
807                 x1 = __xfrm_find_acq_byseq(x->km.seq);
808                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
809                         xfrm_state_put(x1);
810                         x1 = NULL;
811                 }
812         }
813
814         if (use_spi && !x1)
815                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
816                                      x->id.proto,
817                                      &x->id.daddr, &x->props.saddr, 0);
818
819         __xfrm_state_bump_genids(x);
820         __xfrm_state_insert(x);
821         err = 0;
822
823 out:
824         spin_unlock_bh(&xfrm_state_lock);
825
826         if (x1) {
827                 xfrm_state_delete(x1);
828                 xfrm_state_put(x1);
829         }
830
831         return err;
832 }
833 EXPORT_SYMBOL(xfrm_state_add);
834
835 int xfrm_state_update(struct xfrm_state *x)
836 {
837         struct xfrm_state *x1;
838         int err;
839         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
840
841         spin_lock_bh(&xfrm_state_lock);
842         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
843
844         err = -ESRCH;
845         if (!x1)
846                 goto out;
847
848         if (xfrm_state_kern(x1)) {
849                 xfrm_state_put(x1);
850                 err = -EEXIST;
851                 goto out;
852         }
853
854         if (x1->km.state == XFRM_STATE_ACQ) {
855                 __xfrm_state_insert(x);
856                 x = NULL;
857         }
858         err = 0;
859
860 out:
861         spin_unlock_bh(&xfrm_state_lock);
862
863         if (err)
864                 return err;
865
866         if (!x) {
867                 xfrm_state_delete(x1);
868                 xfrm_state_put(x1);
869                 return 0;
870         }
871
872         err = -EINVAL;
873         spin_lock_bh(&x1->lock);
874         if (likely(x1->km.state == XFRM_STATE_VALID)) {
875                 if (x->encap && x1->encap)
876                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
877                 if (x->coaddr && x1->coaddr) {
878                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
879                 }
880                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
881                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
882                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
883                 x1->km.dying = 0;
884
885                 mod_timer(&x1->timer, jiffies + HZ);
886                 if (x1->curlft.use_time)
887                         xfrm_state_check_expire(x1);
888
889                 err = 0;
890         }
891         spin_unlock_bh(&x1->lock);
892
893         xfrm_state_put(x1);
894
895         return err;
896 }
897 EXPORT_SYMBOL(xfrm_state_update);
898
899 int xfrm_state_check_expire(struct xfrm_state *x)
900 {
901         if (!x->curlft.use_time)
902                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
903
904         if (x->km.state != XFRM_STATE_VALID)
905                 return -EINVAL;
906
907         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
908             x->curlft.packets >= x->lft.hard_packet_limit) {
909                 x->km.state = XFRM_STATE_EXPIRED;
910                 mod_timer(&x->timer, jiffies);
911                 return -EINVAL;
912         }
913
914         if (!x->km.dying &&
915             (x->curlft.bytes >= x->lft.soft_byte_limit ||
916              x->curlft.packets >= x->lft.soft_packet_limit)) {
917                 x->km.dying = 1;
918                 km_state_expired(x, 0, 0);
919         }
920         return 0;
921 }
922 EXPORT_SYMBOL(xfrm_state_check_expire);
923
924 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
925 {
926         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
927                 - skb_headroom(skb);
928
929         if (nhead > 0)
930                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
931
932         /* Check tail too... */
933         return 0;
934 }
935
936 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
937 {
938         int err = xfrm_state_check_expire(x);
939         if (err < 0)
940                 goto err;
941         err = xfrm_state_check_space(x, skb);
942 err:
943         return err;
944 }
945 EXPORT_SYMBOL(xfrm_state_check);
946
947 struct xfrm_state *
948 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
949                   unsigned short family)
950 {
951         struct xfrm_state *x;
952
953         spin_lock_bh(&xfrm_state_lock);
954         x = __xfrm_state_lookup(daddr, spi, proto, family);
955         spin_unlock_bh(&xfrm_state_lock);
956         return x;
957 }
958 EXPORT_SYMBOL(xfrm_state_lookup);
959
960 struct xfrm_state *
961 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
962                          u8 proto, unsigned short family)
963 {
964         struct xfrm_state *x;
965
966         spin_lock_bh(&xfrm_state_lock);
967         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
968         spin_unlock_bh(&xfrm_state_lock);
969         return x;
970 }
971 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
972
973 struct xfrm_state *
974 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
975               xfrm_address_t *daddr, xfrm_address_t *saddr, 
976               int create, unsigned short family)
977 {
978         struct xfrm_state *x;
979
980         spin_lock_bh(&xfrm_state_lock);
981         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
982         spin_unlock_bh(&xfrm_state_lock);
983
984         return x;
985 }
986 EXPORT_SYMBOL(xfrm_find_acq);
987
988 #ifdef CONFIG_XFRM_SUB_POLICY
989 int
990 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
991                unsigned short family)
992 {
993         int err = 0;
994         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
995         if (!afinfo)
996                 return -EAFNOSUPPORT;
997
998         spin_lock_bh(&xfrm_state_lock);
999         if (afinfo->tmpl_sort)
1000                 err = afinfo->tmpl_sort(dst, src, n);
1001         spin_unlock_bh(&xfrm_state_lock);
1002         xfrm_state_put_afinfo(afinfo);
1003         return err;
1004 }
1005 EXPORT_SYMBOL(xfrm_tmpl_sort);
1006
1007 int
1008 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1009                 unsigned short family)
1010 {
1011         int err = 0;
1012         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1013         if (!afinfo)
1014                 return -EAFNOSUPPORT;
1015
1016         spin_lock_bh(&xfrm_state_lock);
1017         if (afinfo->state_sort)
1018                 err = afinfo->state_sort(dst, src, n);
1019         spin_unlock_bh(&xfrm_state_lock);
1020         xfrm_state_put_afinfo(afinfo);
1021         return err;
1022 }
1023 EXPORT_SYMBOL(xfrm_state_sort);
1024 #endif
1025
1026 /* Silly enough, but I'm lazy to build resolution list */
1027
1028 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1029 {
1030         int i;
1031
1032         for (i = 0; i <= xfrm_state_hmask; i++) {
1033                 struct hlist_node *entry;
1034                 struct xfrm_state *x;
1035
1036                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1037                         if (x->km.seq == seq &&
1038                             x->km.state == XFRM_STATE_ACQ) {
1039                                 xfrm_state_hold(x);
1040                                 return x;
1041                         }
1042                 }
1043         }
1044         return NULL;
1045 }
1046
1047 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1048 {
1049         struct xfrm_state *x;
1050
1051         spin_lock_bh(&xfrm_state_lock);
1052         x = __xfrm_find_acq_byseq(seq);
1053         spin_unlock_bh(&xfrm_state_lock);
1054         return x;
1055 }
1056 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1057
1058 u32 xfrm_get_acqseq(void)
1059 {
1060         u32 res;
1061         static u32 acqseq;
1062         static DEFINE_SPINLOCK(acqseq_lock);
1063
1064         spin_lock_bh(&acqseq_lock);
1065         res = (++acqseq ? : ++acqseq);
1066         spin_unlock_bh(&acqseq_lock);
1067         return res;
1068 }
1069 EXPORT_SYMBOL(xfrm_get_acqseq);
1070
1071 void
1072 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1073 {
1074         unsigned int h;
1075         struct xfrm_state *x0;
1076
1077         if (x->id.spi)
1078                 return;
1079
1080         if (minspi == maxspi) {
1081                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1082                 if (x0) {
1083                         xfrm_state_put(x0);
1084                         return;
1085                 }
1086                 x->id.spi = minspi;
1087         } else {
1088                 u32 spi = 0;
1089                 u32 low = ntohl(minspi);
1090                 u32 high = ntohl(maxspi);
1091                 for (h=0; h<high-low+1; h++) {
1092                         spi = low + net_random()%(high-low+1);
1093                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1094                         if (x0 == NULL) {
1095                                 x->id.spi = htonl(spi);
1096                                 break;
1097                         }
1098                         xfrm_state_put(x0);
1099                 }
1100         }
1101         if (x->id.spi) {
1102                 spin_lock_bh(&xfrm_state_lock);
1103                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1104                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1105                 spin_unlock_bh(&xfrm_state_lock);
1106                 wake_up(&km_waitq);
1107         }
1108 }
1109 EXPORT_SYMBOL(xfrm_alloc_spi);
1110
1111 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1112                     void *data)
1113 {
1114         int i;
1115         struct xfrm_state *x, *last = NULL;
1116         struct hlist_node *entry;
1117         int count = 0;
1118         int err = 0;
1119
1120         spin_lock_bh(&xfrm_state_lock);
1121         for (i = 0; i <= xfrm_state_hmask; i++) {
1122                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1123                         if (!xfrm_id_proto_match(x->id.proto, proto))
1124                                 continue;
1125                         if (last) {
1126                                 err = func(last, count, data);
1127                                 if (err)
1128                                         goto out;
1129                         }
1130                         last = x;
1131                         count++;
1132                 }
1133         }
1134         if (count == 0) {
1135                 err = -ENOENT;
1136                 goto out;
1137         }
1138         err = func(last, 0, data);
1139 out:
1140         spin_unlock_bh(&xfrm_state_lock);
1141         return err;
1142 }
1143 EXPORT_SYMBOL(xfrm_state_walk);
1144
1145
1146 void xfrm_replay_notify(struct xfrm_state *x, int event)
1147 {
1148         struct km_event c;
1149         /* we send notify messages in case
1150          *  1. we updated on of the sequence numbers, and the seqno difference
1151          *     is at least x->replay_maxdiff, in this case we also update the
1152          *     timeout of our timer function
1153          *  2. if x->replay_maxage has elapsed since last update,
1154          *     and there were changes
1155          *
1156          *  The state structure must be locked!
1157          */
1158
1159         switch (event) {
1160         case XFRM_REPLAY_UPDATE:
1161                 if (x->replay_maxdiff &&
1162                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1163                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1164                         if (x->xflags & XFRM_TIME_DEFER)
1165                                 event = XFRM_REPLAY_TIMEOUT;
1166                         else
1167                                 return;
1168                 }
1169
1170                 break;
1171
1172         case XFRM_REPLAY_TIMEOUT:
1173                 if ((x->replay.seq == x->preplay.seq) &&
1174                     (x->replay.bitmap == x->preplay.bitmap) &&
1175                     (x->replay.oseq == x->preplay.oseq)) {
1176                         x->xflags |= XFRM_TIME_DEFER;
1177                         return;
1178                 }
1179
1180                 break;
1181         }
1182
1183         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1184         c.event = XFRM_MSG_NEWAE;
1185         c.data.aevent = event;
1186         km_state_notify(x, &c);
1187
1188         if (x->replay_maxage &&
1189             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1190                 x->xflags &= ~XFRM_TIME_DEFER;
1191 }
1192 EXPORT_SYMBOL(xfrm_replay_notify);
1193
1194 static void xfrm_replay_timer_handler(unsigned long data)
1195 {
1196         struct xfrm_state *x = (struct xfrm_state*)data;
1197
1198         spin_lock(&x->lock);
1199
1200         if (x->km.state == XFRM_STATE_VALID) {
1201                 if (xfrm_aevent_is_on())
1202                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1203                 else
1204                         x->xflags |= XFRM_TIME_DEFER;
1205         }
1206
1207         spin_unlock(&x->lock);
1208 }
1209
1210 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1211 {
1212         u32 diff;
1213         u32 seq = ntohl(net_seq);
1214
1215         if (unlikely(seq == 0))
1216                 return -EINVAL;
1217
1218         if (likely(seq > x->replay.seq))
1219                 return 0;
1220
1221         diff = x->replay.seq - seq;
1222         if (diff >= x->props.replay_window) {
1223                 x->stats.replay_window++;
1224                 return -EINVAL;
1225         }
1226
1227         if (x->replay.bitmap & (1U << diff)) {
1228                 x->stats.replay++;
1229                 return -EINVAL;
1230         }
1231         return 0;
1232 }
1233 EXPORT_SYMBOL(xfrm_replay_check);
1234
1235 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1236 {
1237         u32 diff;
1238         u32 seq = ntohl(net_seq);
1239
1240         if (seq > x->replay.seq) {
1241                 diff = seq - x->replay.seq;
1242                 if (diff < x->props.replay_window)
1243                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1244                 else
1245                         x->replay.bitmap = 1;
1246                 x->replay.seq = seq;
1247         } else {
1248                 diff = x->replay.seq - seq;
1249                 x->replay.bitmap |= (1U << diff);
1250         }
1251
1252         if (xfrm_aevent_is_on())
1253                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1254 }
1255 EXPORT_SYMBOL(xfrm_replay_advance);
1256
1257 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1258 static DEFINE_RWLOCK(xfrm_km_lock);
1259
1260 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1261 {
1262         struct xfrm_mgr *km;
1263
1264         read_lock(&xfrm_km_lock);
1265         list_for_each_entry(km, &xfrm_km_list, list)
1266                 if (km->notify_policy)
1267                         km->notify_policy(xp, dir, c);
1268         read_unlock(&xfrm_km_lock);
1269 }
1270
1271 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1272 {
1273         struct xfrm_mgr *km;
1274         read_lock(&xfrm_km_lock);
1275         list_for_each_entry(km, &xfrm_km_list, list)
1276                 if (km->notify)
1277                         km->notify(x, c);
1278         read_unlock(&xfrm_km_lock);
1279 }
1280
1281 EXPORT_SYMBOL(km_policy_notify);
1282 EXPORT_SYMBOL(km_state_notify);
1283
1284 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1285 {
1286         struct km_event c;
1287
1288         c.data.hard = hard;
1289         c.pid = pid;
1290         c.event = XFRM_MSG_EXPIRE;
1291         km_state_notify(x, &c);
1292
1293         if (hard)
1294                 wake_up(&km_waitq);
1295 }
1296
1297 EXPORT_SYMBOL(km_state_expired);
1298 /*
1299  * We send to all registered managers regardless of failure
1300  * We are happy with one success
1301 */
1302 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1303 {
1304         int err = -EINVAL, acqret;
1305         struct xfrm_mgr *km;
1306
1307         read_lock(&xfrm_km_lock);
1308         list_for_each_entry(km, &xfrm_km_list, list) {
1309                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1310                 if (!acqret)
1311                         err = acqret;
1312         }
1313         read_unlock(&xfrm_km_lock);
1314         return err;
1315 }
1316 EXPORT_SYMBOL(km_query);
1317
1318 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1319 {
1320         int err = -EINVAL;
1321         struct xfrm_mgr *km;
1322
1323         read_lock(&xfrm_km_lock);
1324         list_for_each_entry(km, &xfrm_km_list, list) {
1325                 if (km->new_mapping)
1326                         err = km->new_mapping(x, ipaddr, sport);
1327                 if (!err)
1328                         break;
1329         }
1330         read_unlock(&xfrm_km_lock);
1331         return err;
1332 }
1333 EXPORT_SYMBOL(km_new_mapping);
1334
1335 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1336 {
1337         struct km_event c;
1338
1339         c.data.hard = hard;
1340         c.pid = pid;
1341         c.event = XFRM_MSG_POLEXPIRE;
1342         km_policy_notify(pol, dir, &c);
1343
1344         if (hard)
1345                 wake_up(&km_waitq);
1346 }
1347 EXPORT_SYMBOL(km_policy_expired);
1348
1349 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1350 {
1351         int err = -EINVAL;
1352         int ret;
1353         struct xfrm_mgr *km;
1354
1355         read_lock(&xfrm_km_lock);
1356         list_for_each_entry(km, &xfrm_km_list, list) {
1357                 if (km->report) {
1358                         ret = km->report(proto, sel, addr);
1359                         if (!ret)
1360                                 err = ret;
1361                 }
1362         }
1363         read_unlock(&xfrm_km_lock);
1364         return err;
1365 }
1366 EXPORT_SYMBOL(km_report);
1367
1368 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1369 {
1370         int err;
1371         u8 *data;
1372         struct xfrm_mgr *km;
1373         struct xfrm_policy *pol = NULL;
1374
1375         if (optlen <= 0 || optlen > PAGE_SIZE)
1376                 return -EMSGSIZE;
1377
1378         data = kmalloc(optlen, GFP_KERNEL);
1379         if (!data)
1380                 return -ENOMEM;
1381
1382         err = -EFAULT;
1383         if (copy_from_user(data, optval, optlen))
1384                 goto out;
1385
1386         err = -EINVAL;
1387         read_lock(&xfrm_km_lock);
1388         list_for_each_entry(km, &xfrm_km_list, list) {
1389                 pol = km->compile_policy(sk, optname, data,
1390                                          optlen, &err);
1391                 if (err >= 0)
1392                         break;
1393         }
1394         read_unlock(&xfrm_km_lock);
1395
1396         if (err >= 0) {
1397                 xfrm_sk_policy_insert(sk, err, pol);
1398                 xfrm_pol_put(pol);
1399                 err = 0;
1400         }
1401
1402 out:
1403         kfree(data);
1404         return err;
1405 }
1406 EXPORT_SYMBOL(xfrm_user_policy);
1407
1408 int xfrm_register_km(struct xfrm_mgr *km)
1409 {
1410         write_lock_bh(&xfrm_km_lock);
1411         list_add_tail(&km->list, &xfrm_km_list);
1412         write_unlock_bh(&xfrm_km_lock);
1413         return 0;
1414 }
1415 EXPORT_SYMBOL(xfrm_register_km);
1416
1417 int xfrm_unregister_km(struct xfrm_mgr *km)
1418 {
1419         write_lock_bh(&xfrm_km_lock);
1420         list_del(&km->list);
1421         write_unlock_bh(&xfrm_km_lock);
1422         return 0;
1423 }
1424 EXPORT_SYMBOL(xfrm_unregister_km);
1425
1426 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1427 {
1428         int err = 0;
1429         if (unlikely(afinfo == NULL))
1430                 return -EINVAL;
1431         if (unlikely(afinfo->family >= NPROTO))
1432                 return -EAFNOSUPPORT;
1433         write_lock_bh(&xfrm_state_afinfo_lock);
1434         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1435                 err = -ENOBUFS;
1436         else
1437                 xfrm_state_afinfo[afinfo->family] = afinfo;
1438         write_unlock_bh(&xfrm_state_afinfo_lock);
1439         return err;
1440 }
1441 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1442
1443 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1444 {
1445         int err = 0;
1446         if (unlikely(afinfo == NULL))
1447                 return -EINVAL;
1448         if (unlikely(afinfo->family >= NPROTO))
1449                 return -EAFNOSUPPORT;
1450         write_lock_bh(&xfrm_state_afinfo_lock);
1451         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1452                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1453                         err = -EINVAL;
1454                 else
1455                         xfrm_state_afinfo[afinfo->family] = NULL;
1456         }
1457         write_unlock_bh(&xfrm_state_afinfo_lock);
1458         return err;
1459 }
1460 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1461
1462 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1463 {
1464         struct xfrm_state_afinfo *afinfo;
1465         if (unlikely(family >= NPROTO))
1466                 return NULL;
1467         read_lock(&xfrm_state_afinfo_lock);
1468         afinfo = xfrm_state_afinfo[family];
1469         if (unlikely(!afinfo))
1470                 read_unlock(&xfrm_state_afinfo_lock);
1471         return afinfo;
1472 }
1473
1474 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1475 {
1476         read_unlock(&xfrm_state_afinfo_lock);
1477 }
1478
1479 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1480 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1481 {
1482         if (x->tunnel) {
1483                 struct xfrm_state *t = x->tunnel;
1484
1485                 if (atomic_read(&t->tunnel_users) == 2)
1486                         xfrm_state_delete(t);
1487                 atomic_dec(&t->tunnel_users);
1488                 xfrm_state_put(t);
1489                 x->tunnel = NULL;
1490         }
1491 }
1492 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1493
1494 /*
1495  * This function is NOT optimal.  For example, with ESP it will give an
1496  * MTU that's usually two bytes short of being optimal.  However, it will
1497  * usually give an answer that's a multiple of 4 provided the input is
1498  * also a multiple of 4.
1499  */
1500 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1501 {
1502         int res = mtu;
1503
1504         res -= x->props.header_len;
1505
1506         for (;;) {
1507                 int m = res;
1508
1509                 if (m < 68)
1510                         return 68;
1511
1512                 spin_lock_bh(&x->lock);
1513                 if (x->km.state == XFRM_STATE_VALID &&
1514                     x->type && x->type->get_max_size)
1515                         m = x->type->get_max_size(x, m);
1516                 else
1517                         m += x->props.header_len;
1518                 spin_unlock_bh(&x->lock);
1519
1520                 if (m <= mtu)
1521                         break;
1522                 res -= (m - mtu);
1523         }
1524
1525         return res;
1526 }
1527
1528 int xfrm_init_state(struct xfrm_state *x)
1529 {
1530         struct xfrm_state_afinfo *afinfo;
1531         int family = x->props.family;
1532         int err;
1533
1534         err = -EAFNOSUPPORT;
1535         afinfo = xfrm_state_get_afinfo(family);
1536         if (!afinfo)
1537                 goto error;
1538
1539         err = 0;
1540         if (afinfo->init_flags)
1541                 err = afinfo->init_flags(x);
1542
1543         xfrm_state_put_afinfo(afinfo);
1544
1545         if (err)
1546                 goto error;
1547
1548         err = -EPROTONOSUPPORT;
1549         x->type = xfrm_get_type(x->id.proto, family);
1550         if (x->type == NULL)
1551                 goto error;
1552
1553         err = x->type->init_state(x);
1554         if (err)
1555                 goto error;
1556
1557         x->mode = xfrm_get_mode(x->props.mode, family);
1558         if (x->mode == NULL)
1559                 goto error;
1560
1561         x->km.state = XFRM_STATE_VALID;
1562
1563 error:
1564         return err;
1565 }
1566
1567 EXPORT_SYMBOL(xfrm_init_state);
1568  
1569 void __init xfrm_state_init(void)
1570 {
1571         unsigned int sz;
1572
1573         sz = sizeof(struct hlist_head) * 8;
1574
1575         xfrm_state_bydst = xfrm_hash_alloc(sz);
1576         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1577         xfrm_state_byspi = xfrm_hash_alloc(sz);
1578         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1579                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1580         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1581
1582         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1583 }
1584