[XFRM] STATE: Use destination address for src hash.
[safe/jmp/linux-2.6] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23
24 #include "xfrm_hash.h"
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
59                                          xfrm_address_t *saddr,
60                                          u32 reqid,
61                                          unsigned short family)
62 {
63         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
64 }
65
66 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
67                                          xfrm_address_t *saddr,
68                                          unsigned short family)
69 {
70         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
71 }
72
73 static inline unsigned int
74 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
75 {
76         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
77 }
78
79 static void xfrm_hash_transfer(struct hlist_head *list,
80                                struct hlist_head *ndsttable,
81                                struct hlist_head *nsrctable,
82                                struct hlist_head *nspitable,
83                                unsigned int nhashmask)
84 {
85         struct hlist_node *entry, *tmp;
86         struct xfrm_state *x;
87
88         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
89                 unsigned int h;
90
91                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
92                                     x->props.reqid, x->props.family,
93                                     nhashmask);
94                 hlist_add_head(&x->bydst, ndsttable+h);
95
96                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
97                                     x->props.family,
98                                     nhashmask);
99                 hlist_add_head(&x->bysrc, nsrctable+h);
100
101                 if (x->id.spi) {
102                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
103                                             x->id.proto, x->props.family,
104                                             nhashmask);
105                         hlist_add_head(&x->byspi, nspitable+h);
106                 }
107         }
108 }
109
110 static unsigned long xfrm_hash_new_size(void)
111 {
112         return ((xfrm_state_hmask + 1) << 1) *
113                 sizeof(struct hlist_head);
114 }
115
116 static DEFINE_MUTEX(hash_resize_mutex);
117
118 static void xfrm_hash_resize(void *__unused)
119 {
120         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
121         unsigned long nsize, osize;
122         unsigned int nhashmask, ohashmask;
123         int i;
124
125         mutex_lock(&hash_resize_mutex);
126
127         nsize = xfrm_hash_new_size();
128         ndst = xfrm_hash_alloc(nsize);
129         if (!ndst)
130                 goto out_unlock;
131         nsrc = xfrm_hash_alloc(nsize);
132         if (!nsrc) {
133                 xfrm_hash_free(ndst, nsize);
134                 goto out_unlock;
135         }
136         nspi = xfrm_hash_alloc(nsize);
137         if (!nspi) {
138                 xfrm_hash_free(ndst, nsize);
139                 xfrm_hash_free(nsrc, nsize);
140                 goto out_unlock;
141         }
142
143         spin_lock_bh(&xfrm_state_lock);
144
145         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
146         for (i = xfrm_state_hmask; i >= 0; i--)
147                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
148                                    nhashmask);
149
150         odst = xfrm_state_bydst;
151         osrc = xfrm_state_bysrc;
152         ospi = xfrm_state_byspi;
153         ohashmask = xfrm_state_hmask;
154
155         xfrm_state_bydst = ndst;
156         xfrm_state_bysrc = nsrc;
157         xfrm_state_byspi = nspi;
158         xfrm_state_hmask = nhashmask;
159
160         spin_unlock_bh(&xfrm_state_lock);
161
162         osize = (ohashmask + 1) * sizeof(struct hlist_head);
163         xfrm_hash_free(odst, osize);
164         xfrm_hash_free(osrc, osize);
165         xfrm_hash_free(ospi, osize);
166
167 out_unlock:
168         mutex_unlock(&hash_resize_mutex);
169 }
170
171 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
172
173 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
174 EXPORT_SYMBOL(km_waitq);
175
176 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
177 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
178
179 static struct work_struct xfrm_state_gc_work;
180 static HLIST_HEAD(xfrm_state_gc_list);
181 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
182
183 int __xfrm_state_delete(struct xfrm_state *x);
184
185 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
186 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
187
188 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
189 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
190
191 static void xfrm_state_gc_destroy(struct xfrm_state *x)
192 {
193         del_timer_sync(&x->timer);
194         del_timer_sync(&x->rtimer);
195         kfree(x->aalg);
196         kfree(x->ealg);
197         kfree(x->calg);
198         kfree(x->encap);
199         kfree(x->coaddr);
200         if (x->mode)
201                 xfrm_put_mode(x->mode);
202         if (x->type) {
203                 x->type->destructor(x);
204                 xfrm_put_type(x->type);
205         }
206         security_xfrm_state_free(x);
207         kfree(x);
208 }
209
210 static void xfrm_state_gc_task(void *data)
211 {
212         struct xfrm_state *x;
213         struct hlist_node *entry, *tmp;
214         struct hlist_head gc_list;
215
216         spin_lock_bh(&xfrm_state_gc_lock);
217         gc_list.first = xfrm_state_gc_list.first;
218         INIT_HLIST_HEAD(&xfrm_state_gc_list);
219         spin_unlock_bh(&xfrm_state_gc_lock);
220
221         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
222                 xfrm_state_gc_destroy(x);
223
224         wake_up(&km_waitq);
225 }
226
227 static inline unsigned long make_jiffies(long secs)
228 {
229         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
230                 return MAX_SCHEDULE_TIMEOUT-1;
231         else
232                 return secs*HZ;
233 }
234
235 static void xfrm_timer_handler(unsigned long data)
236 {
237         struct xfrm_state *x = (struct xfrm_state*)data;
238         unsigned long now = (unsigned long)xtime.tv_sec;
239         long next = LONG_MAX;
240         int warn = 0;
241
242         spin_lock(&x->lock);
243         if (x->km.state == XFRM_STATE_DEAD)
244                 goto out;
245         if (x->km.state == XFRM_STATE_EXPIRED)
246                 goto expired;
247         if (x->lft.hard_add_expires_seconds) {
248                 long tmo = x->lft.hard_add_expires_seconds +
249                         x->curlft.add_time - now;
250                 if (tmo <= 0)
251                         goto expired;
252                 if (tmo < next)
253                         next = tmo;
254         }
255         if (x->lft.hard_use_expires_seconds) {
256                 long tmo = x->lft.hard_use_expires_seconds +
257                         (x->curlft.use_time ? : now) - now;
258                 if (tmo <= 0)
259                         goto expired;
260                 if (tmo < next)
261                         next = tmo;
262         }
263         if (x->km.dying)
264                 goto resched;
265         if (x->lft.soft_add_expires_seconds) {
266                 long tmo = x->lft.soft_add_expires_seconds +
267                         x->curlft.add_time - now;
268                 if (tmo <= 0)
269                         warn = 1;
270                 else if (tmo < next)
271                         next = tmo;
272         }
273         if (x->lft.soft_use_expires_seconds) {
274                 long tmo = x->lft.soft_use_expires_seconds +
275                         (x->curlft.use_time ? : now) - now;
276                 if (tmo <= 0)
277                         warn = 1;
278                 else if (tmo < next)
279                         next = tmo;
280         }
281
282         x->km.dying = warn;
283         if (warn)
284                 km_state_expired(x, 0, 0);
285 resched:
286         if (next != LONG_MAX)
287                 mod_timer(&x->timer, jiffies + make_jiffies(next));
288
289         goto out;
290
291 expired:
292         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
293                 x->km.state = XFRM_STATE_EXPIRED;
294                 wake_up(&km_waitq);
295                 next = 2;
296                 goto resched;
297         }
298         if (!__xfrm_state_delete(x) && x->id.spi)
299                 km_state_expired(x, 1, 0);
300
301 out:
302         spin_unlock(&x->lock);
303 }
304
305 static void xfrm_replay_timer_handler(unsigned long data);
306
307 struct xfrm_state *xfrm_state_alloc(void)
308 {
309         struct xfrm_state *x;
310
311         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
312
313         if (x) {
314                 atomic_set(&x->refcnt, 1);
315                 atomic_set(&x->tunnel_users, 0);
316                 INIT_HLIST_NODE(&x->bydst);
317                 INIT_HLIST_NODE(&x->bysrc);
318                 INIT_HLIST_NODE(&x->byspi);
319                 init_timer(&x->timer);
320                 x->timer.function = xfrm_timer_handler;
321                 x->timer.data     = (unsigned long)x;
322                 init_timer(&x->rtimer);
323                 x->rtimer.function = xfrm_replay_timer_handler;
324                 x->rtimer.data     = (unsigned long)x;
325                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
326                 x->lft.soft_byte_limit = XFRM_INF;
327                 x->lft.soft_packet_limit = XFRM_INF;
328                 x->lft.hard_byte_limit = XFRM_INF;
329                 x->lft.hard_packet_limit = XFRM_INF;
330                 x->replay_maxage = 0;
331                 x->replay_maxdiff = 0;
332                 spin_lock_init(&x->lock);
333         }
334         return x;
335 }
336 EXPORT_SYMBOL(xfrm_state_alloc);
337
338 void __xfrm_state_destroy(struct xfrm_state *x)
339 {
340         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
341
342         spin_lock_bh(&xfrm_state_gc_lock);
343         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
344         spin_unlock_bh(&xfrm_state_gc_lock);
345         schedule_work(&xfrm_state_gc_work);
346 }
347 EXPORT_SYMBOL(__xfrm_state_destroy);
348
349 int __xfrm_state_delete(struct xfrm_state *x)
350 {
351         int err = -ESRCH;
352
353         if (x->km.state != XFRM_STATE_DEAD) {
354                 x->km.state = XFRM_STATE_DEAD;
355                 spin_lock(&xfrm_state_lock);
356                 hlist_del(&x->bydst);
357                 hlist_del(&x->bysrc);
358                 if (x->id.spi)
359                         hlist_del(&x->byspi);
360                 xfrm_state_num--;
361                 spin_unlock(&xfrm_state_lock);
362
363                 /* All xfrm_state objects are created by xfrm_state_alloc.
364                  * The xfrm_state_alloc call gives a reference, and that
365                  * is what we are dropping here.
366                  */
367                 __xfrm_state_put(x);
368                 err = 0;
369         }
370
371         return err;
372 }
373 EXPORT_SYMBOL(__xfrm_state_delete);
374
375 int xfrm_state_delete(struct xfrm_state *x)
376 {
377         int err;
378
379         spin_lock_bh(&x->lock);
380         err = __xfrm_state_delete(x);
381         spin_unlock_bh(&x->lock);
382
383         return err;
384 }
385 EXPORT_SYMBOL(xfrm_state_delete);
386
387 void xfrm_state_flush(u8 proto)
388 {
389         int i;
390
391         spin_lock_bh(&xfrm_state_lock);
392         for (i = 0; i <= xfrm_state_hmask; i++) {
393                 struct hlist_node *entry;
394                 struct xfrm_state *x;
395 restart:
396                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
397                         if (!xfrm_state_kern(x) &&
398                             xfrm_id_proto_match(x->id.proto, proto)) {
399                                 xfrm_state_hold(x);
400                                 spin_unlock_bh(&xfrm_state_lock);
401
402                                 xfrm_state_delete(x);
403                                 xfrm_state_put(x);
404
405                                 spin_lock_bh(&xfrm_state_lock);
406                                 goto restart;
407                         }
408                 }
409         }
410         spin_unlock_bh(&xfrm_state_lock);
411         wake_up(&km_waitq);
412 }
413 EXPORT_SYMBOL(xfrm_state_flush);
414
415 static int
416 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
417                   struct xfrm_tmpl *tmpl,
418                   xfrm_address_t *daddr, xfrm_address_t *saddr,
419                   unsigned short family)
420 {
421         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
422         if (!afinfo)
423                 return -1;
424         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
425         xfrm_state_put_afinfo(afinfo);
426         return 0;
427 }
428
429 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
430 {
431         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
432         struct xfrm_state *x;
433         struct hlist_node *entry;
434
435         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
436                 if (x->props.family != family ||
437                     x->id.spi       != spi ||
438                     x->id.proto     != proto)
439                         continue;
440
441                 switch (family) {
442                 case AF_INET:
443                         if (x->id.daddr.a4 != daddr->a4)
444                                 continue;
445                         break;
446                 case AF_INET6:
447                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
448                                              (struct in6_addr *)
449                                              x->id.daddr.a6))
450                                 continue;
451                         break;
452                 };
453
454                 xfrm_state_hold(x);
455                 return x;
456         }
457
458         return NULL;
459 }
460
461 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
462 {
463         unsigned int h = xfrm_src_hash(daddr, saddr, family);
464         struct xfrm_state *x;
465         struct hlist_node *entry;
466
467         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
468                 if (x->props.family != family ||
469                     x->id.proto     != proto)
470                         continue;
471
472                 switch (family) {
473                 case AF_INET:
474                         if (x->id.daddr.a4 != daddr->a4 ||
475                             x->props.saddr.a4 != saddr->a4)
476                                 continue;
477                         break;
478                 case AF_INET6:
479                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
480                                              (struct in6_addr *)
481                                              x->id.daddr.a6) ||
482                             !ipv6_addr_equal((struct in6_addr *)saddr,
483                                              (struct in6_addr *)
484                                              x->props.saddr.a6))
485                                 continue;
486                         break;
487                 };
488
489                 xfrm_state_hold(x);
490                 return x;
491         }
492
493         return NULL;
494 }
495
496 static inline struct xfrm_state *
497 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
498 {
499         if (use_spi)
500                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
501                                            x->id.proto, family);
502         else
503                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
504                                                   &x->props.saddr,
505                                                   x->id.proto, family);
506 }
507
508 struct xfrm_state *
509 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
510                 struct flowi *fl, struct xfrm_tmpl *tmpl,
511                 struct xfrm_policy *pol, int *err,
512                 unsigned short family)
513 {
514         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
515         struct hlist_node *entry;
516         struct xfrm_state *x, *x0;
517         int acquire_in_progress = 0;
518         int error = 0;
519         struct xfrm_state *best = NULL;
520         
521         spin_lock_bh(&xfrm_state_lock);
522         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
523                 if (x->props.family == family &&
524                     x->props.reqid == tmpl->reqid &&
525                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
526                     xfrm_state_addr_check(x, daddr, saddr, family) &&
527                     tmpl->mode == x->props.mode &&
528                     tmpl->id.proto == x->id.proto &&
529                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
530                         /* Resolution logic:
531                            1. There is a valid state with matching selector.
532                               Done.
533                            2. Valid state with inappropriate selector. Skip.
534
535                            Entering area of "sysdeps".
536
537                            3. If state is not valid, selector is temporary,
538                               it selects only session which triggered
539                               previous resolution. Key manager will do
540                               something to install a state with proper
541                               selector.
542                          */
543                         if (x->km.state == XFRM_STATE_VALID) {
544                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
545                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
546                                         continue;
547                                 if (!best ||
548                                     best->km.dying > x->km.dying ||
549                                     (best->km.dying == x->km.dying &&
550                                      best->curlft.add_time < x->curlft.add_time))
551                                         best = x;
552                         } else if (x->km.state == XFRM_STATE_ACQ) {
553                                 acquire_in_progress = 1;
554                         } else if (x->km.state == XFRM_STATE_ERROR ||
555                                    x->km.state == XFRM_STATE_EXPIRED) {
556                                 if (xfrm_selector_match(&x->sel, fl, family) &&
557                                     security_xfrm_state_pol_flow_match(x, pol, fl))
558                                         error = -ESRCH;
559                         }
560                 }
561         }
562
563         x = best;
564         if (!x && !error && !acquire_in_progress) {
565                 if (tmpl->id.spi &&
566                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
567                                               tmpl->id.proto, family)) != NULL) {
568                         xfrm_state_put(x0);
569                         error = -EEXIST;
570                         goto out;
571                 }
572                 x = xfrm_state_alloc();
573                 if (x == NULL) {
574                         error = -ENOMEM;
575                         goto out;
576                 }
577                 /* Initialize temporary selector matching only
578                  * to current session. */
579                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
580
581                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
582                 if (error) {
583                         x->km.state = XFRM_STATE_DEAD;
584                         xfrm_state_put(x);
585                         x = NULL;
586                         goto out;
587                 }
588
589                 if (km_query(x, tmpl, pol) == 0) {
590                         x->km.state = XFRM_STATE_ACQ;
591                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
592                         h = xfrm_src_hash(daddr, saddr, family);
593                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
594                         if (x->id.spi) {
595                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
596                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
597                         }
598                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
599                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
600                         add_timer(&x->timer);
601                 } else {
602                         x->km.state = XFRM_STATE_DEAD;
603                         xfrm_state_put(x);
604                         x = NULL;
605                         error = -ESRCH;
606                 }
607         }
608 out:
609         if (x)
610                 xfrm_state_hold(x);
611         else
612                 *err = acquire_in_progress ? -EAGAIN : error;
613         spin_unlock_bh(&xfrm_state_lock);
614         return x;
615 }
616
617 static void __xfrm_state_insert(struct xfrm_state *x)
618 {
619         unsigned int h;
620
621         x->genid = ++xfrm_state_genid;
622
623         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
624                           x->props.reqid, x->props.family);
625         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
626
627         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
628         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
629
630         if (x->id.spi) {
631                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
632                                   x->props.family);
633
634                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
635         }
636
637         mod_timer(&x->timer, jiffies + HZ);
638         if (x->replay_maxage)
639                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
640
641         wake_up(&km_waitq);
642
643         xfrm_state_num++;
644
645         if (x->bydst.next != NULL &&
646             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
647             xfrm_state_num > xfrm_state_hmask)
648                 schedule_work(&xfrm_hash_work);
649 }
650
651 /* xfrm_state_lock is held */
652 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
653 {
654         unsigned short family = xnew->props.family;
655         u32 reqid = xnew->props.reqid;
656         struct xfrm_state *x;
657         struct hlist_node *entry;
658         unsigned int h;
659
660         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
661         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
662                 if (x->props.family     == family &&
663                     x->props.reqid      == reqid &&
664                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
665                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
666                         x->genid = xfrm_state_genid;
667         }
668 }
669
670 void xfrm_state_insert(struct xfrm_state *x)
671 {
672         spin_lock_bh(&xfrm_state_lock);
673         __xfrm_state_bump_genids(x);
674         __xfrm_state_insert(x);
675         spin_unlock_bh(&xfrm_state_lock);
676 }
677 EXPORT_SYMBOL(xfrm_state_insert);
678
679 /* xfrm_state_lock is held */
680 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
681 {
682         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
683         struct hlist_node *entry;
684         struct xfrm_state *x;
685
686         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
687                 if (x->props.reqid  != reqid ||
688                     x->props.mode   != mode ||
689                     x->props.family != family ||
690                     x->km.state     != XFRM_STATE_ACQ ||
691                     x->id.spi       != 0)
692                         continue;
693
694                 switch (family) {
695                 case AF_INET:
696                         if (x->id.daddr.a4    != daddr->a4 ||
697                             x->props.saddr.a4 != saddr->a4)
698                                 continue;
699                         break;
700                 case AF_INET6:
701                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
702                                              (struct in6_addr *)daddr) ||
703                             !ipv6_addr_equal((struct in6_addr *)
704                                              x->props.saddr.a6,
705                                              (struct in6_addr *)saddr))
706                                 continue;
707                         break;
708                 };
709
710                 xfrm_state_hold(x);
711                 return x;
712         }
713
714         if (!create)
715                 return NULL;
716
717         x = xfrm_state_alloc();
718         if (likely(x)) {
719                 switch (family) {
720                 case AF_INET:
721                         x->sel.daddr.a4 = daddr->a4;
722                         x->sel.saddr.a4 = saddr->a4;
723                         x->sel.prefixlen_d = 32;
724                         x->sel.prefixlen_s = 32;
725                         x->props.saddr.a4 = saddr->a4;
726                         x->id.daddr.a4 = daddr->a4;
727                         break;
728
729                 case AF_INET6:
730                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
731                                        (struct in6_addr *)daddr);
732                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
733                                        (struct in6_addr *)saddr);
734                         x->sel.prefixlen_d = 128;
735                         x->sel.prefixlen_s = 128;
736                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
737                                        (struct in6_addr *)saddr);
738                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
739                                        (struct in6_addr *)daddr);
740                         break;
741                 };
742
743                 x->km.state = XFRM_STATE_ACQ;
744                 x->id.proto = proto;
745                 x->props.family = family;
746                 x->props.mode = mode;
747                 x->props.reqid = reqid;
748                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
749                 xfrm_state_hold(x);
750                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
751                 add_timer(&x->timer);
752                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
753                 h = xfrm_src_hash(daddr, saddr, family);
754                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
755                 wake_up(&km_waitq);
756         }
757
758         return x;
759 }
760
761 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
762
763 int xfrm_state_add(struct xfrm_state *x)
764 {
765         struct xfrm_state *x1;
766         int family;
767         int err;
768         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
769
770         family = x->props.family;
771
772         spin_lock_bh(&xfrm_state_lock);
773
774         x1 = __xfrm_state_locate(x, use_spi, family);
775         if (x1) {
776                 xfrm_state_put(x1);
777                 x1 = NULL;
778                 err = -EEXIST;
779                 goto out;
780         }
781
782         if (use_spi && x->km.seq) {
783                 x1 = __xfrm_find_acq_byseq(x->km.seq);
784                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
785                         xfrm_state_put(x1);
786                         x1 = NULL;
787                 }
788         }
789
790         if (use_spi && !x1)
791                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
792                                      x->id.proto,
793                                      &x->id.daddr, &x->props.saddr, 0);
794
795         __xfrm_state_bump_genids(x);
796         __xfrm_state_insert(x);
797         err = 0;
798
799 out:
800         spin_unlock_bh(&xfrm_state_lock);
801
802         if (x1) {
803                 xfrm_state_delete(x1);
804                 xfrm_state_put(x1);
805         }
806
807         return err;
808 }
809 EXPORT_SYMBOL(xfrm_state_add);
810
811 int xfrm_state_update(struct xfrm_state *x)
812 {
813         struct xfrm_state *x1;
814         int err;
815         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
816
817         spin_lock_bh(&xfrm_state_lock);
818         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
819
820         err = -ESRCH;
821         if (!x1)
822                 goto out;
823
824         if (xfrm_state_kern(x1)) {
825                 xfrm_state_put(x1);
826                 err = -EEXIST;
827                 goto out;
828         }
829
830         if (x1->km.state == XFRM_STATE_ACQ) {
831                 __xfrm_state_insert(x);
832                 x = NULL;
833         }
834         err = 0;
835
836 out:
837         spin_unlock_bh(&xfrm_state_lock);
838
839         if (err)
840                 return err;
841
842         if (!x) {
843                 xfrm_state_delete(x1);
844                 xfrm_state_put(x1);
845                 return 0;
846         }
847
848         err = -EINVAL;
849         spin_lock_bh(&x1->lock);
850         if (likely(x1->km.state == XFRM_STATE_VALID)) {
851                 if (x->encap && x1->encap)
852                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
853                 if (x->coaddr && x1->coaddr) {
854                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
855                 }
856                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
857                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
858                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
859                 x1->km.dying = 0;
860
861                 mod_timer(&x1->timer, jiffies + HZ);
862                 if (x1->curlft.use_time)
863                         xfrm_state_check_expire(x1);
864
865                 err = 0;
866         }
867         spin_unlock_bh(&x1->lock);
868
869         xfrm_state_put(x1);
870
871         return err;
872 }
873 EXPORT_SYMBOL(xfrm_state_update);
874
875 int xfrm_state_check_expire(struct xfrm_state *x)
876 {
877         if (!x->curlft.use_time)
878                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
879
880         if (x->km.state != XFRM_STATE_VALID)
881                 return -EINVAL;
882
883         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
884             x->curlft.packets >= x->lft.hard_packet_limit) {
885                 x->km.state = XFRM_STATE_EXPIRED;
886                 mod_timer(&x->timer, jiffies);
887                 return -EINVAL;
888         }
889
890         if (!x->km.dying &&
891             (x->curlft.bytes >= x->lft.soft_byte_limit ||
892              x->curlft.packets >= x->lft.soft_packet_limit)) {
893                 x->km.dying = 1;
894                 km_state_expired(x, 0, 0);
895         }
896         return 0;
897 }
898 EXPORT_SYMBOL(xfrm_state_check_expire);
899
900 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
901 {
902         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
903                 - skb_headroom(skb);
904
905         if (nhead > 0)
906                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
907
908         /* Check tail too... */
909         return 0;
910 }
911
912 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
913 {
914         int err = xfrm_state_check_expire(x);
915         if (err < 0)
916                 goto err;
917         err = xfrm_state_check_space(x, skb);
918 err:
919         return err;
920 }
921 EXPORT_SYMBOL(xfrm_state_check);
922
923 struct xfrm_state *
924 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
925                   unsigned short family)
926 {
927         struct xfrm_state *x;
928
929         spin_lock_bh(&xfrm_state_lock);
930         x = __xfrm_state_lookup(daddr, spi, proto, family);
931         spin_unlock_bh(&xfrm_state_lock);
932         return x;
933 }
934 EXPORT_SYMBOL(xfrm_state_lookup);
935
936 struct xfrm_state *
937 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
938                          u8 proto, unsigned short family)
939 {
940         struct xfrm_state *x;
941
942         spin_lock_bh(&xfrm_state_lock);
943         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
944         spin_unlock_bh(&xfrm_state_lock);
945         return x;
946 }
947 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
948
949 struct xfrm_state *
950 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
951               xfrm_address_t *daddr, xfrm_address_t *saddr, 
952               int create, unsigned short family)
953 {
954         struct xfrm_state *x;
955
956         spin_lock_bh(&xfrm_state_lock);
957         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
958         spin_unlock_bh(&xfrm_state_lock);
959
960         return x;
961 }
962 EXPORT_SYMBOL(xfrm_find_acq);
963
964 #ifdef CONFIG_XFRM_SUB_POLICY
965 int
966 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
967                unsigned short family)
968 {
969         int err = 0;
970         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
971         if (!afinfo)
972                 return -EAFNOSUPPORT;
973
974         spin_lock_bh(&xfrm_state_lock);
975         if (afinfo->tmpl_sort)
976                 err = afinfo->tmpl_sort(dst, src, n);
977         spin_unlock_bh(&xfrm_state_lock);
978         xfrm_state_put_afinfo(afinfo);
979         return err;
980 }
981 EXPORT_SYMBOL(xfrm_tmpl_sort);
982
983 int
984 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
985                 unsigned short family)
986 {
987         int err = 0;
988         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
989         if (!afinfo)
990                 return -EAFNOSUPPORT;
991
992         spin_lock_bh(&xfrm_state_lock);
993         if (afinfo->state_sort)
994                 err = afinfo->state_sort(dst, src, n);
995         spin_unlock_bh(&xfrm_state_lock);
996         xfrm_state_put_afinfo(afinfo);
997         return err;
998 }
999 EXPORT_SYMBOL(xfrm_state_sort);
1000 #endif
1001
1002 /* Silly enough, but I'm lazy to build resolution list */
1003
1004 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1005 {
1006         int i;
1007
1008         for (i = 0; i <= xfrm_state_hmask; i++) {
1009                 struct hlist_node *entry;
1010                 struct xfrm_state *x;
1011
1012                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1013                         if (x->km.seq == seq &&
1014                             x->km.state == XFRM_STATE_ACQ) {
1015                                 xfrm_state_hold(x);
1016                                 return x;
1017                         }
1018                 }
1019         }
1020         return NULL;
1021 }
1022
1023 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1024 {
1025         struct xfrm_state *x;
1026
1027         spin_lock_bh(&xfrm_state_lock);
1028         x = __xfrm_find_acq_byseq(seq);
1029         spin_unlock_bh(&xfrm_state_lock);
1030         return x;
1031 }
1032 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1033
1034 u32 xfrm_get_acqseq(void)
1035 {
1036         u32 res;
1037         static u32 acqseq;
1038         static DEFINE_SPINLOCK(acqseq_lock);
1039
1040         spin_lock_bh(&acqseq_lock);
1041         res = (++acqseq ? : ++acqseq);
1042         spin_unlock_bh(&acqseq_lock);
1043         return res;
1044 }
1045 EXPORT_SYMBOL(xfrm_get_acqseq);
1046
1047 void
1048 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1049 {
1050         unsigned int h;
1051         struct xfrm_state *x0;
1052
1053         if (x->id.spi)
1054                 return;
1055
1056         if (minspi == maxspi) {
1057                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1058                 if (x0) {
1059                         xfrm_state_put(x0);
1060                         return;
1061                 }
1062                 x->id.spi = minspi;
1063         } else {
1064                 u32 spi = 0;
1065                 u32 low = ntohl(minspi);
1066                 u32 high = ntohl(maxspi);
1067                 for (h=0; h<high-low+1; h++) {
1068                         spi = low + net_random()%(high-low+1);
1069                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1070                         if (x0 == NULL) {
1071                                 x->id.spi = htonl(spi);
1072                                 break;
1073                         }
1074                         xfrm_state_put(x0);
1075                 }
1076         }
1077         if (x->id.spi) {
1078                 spin_lock_bh(&xfrm_state_lock);
1079                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1080                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1081                 spin_unlock_bh(&xfrm_state_lock);
1082                 wake_up(&km_waitq);
1083         }
1084 }
1085 EXPORT_SYMBOL(xfrm_alloc_spi);
1086
1087 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1088                     void *data)
1089 {
1090         int i;
1091         struct xfrm_state *x;
1092         struct hlist_node *entry;
1093         int count = 0;
1094         int err = 0;
1095
1096         spin_lock_bh(&xfrm_state_lock);
1097         for (i = 0; i <= xfrm_state_hmask; i++) {
1098                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1099                         if (xfrm_id_proto_match(x->id.proto, proto))
1100                                 count++;
1101                 }
1102         }
1103         if (count == 0) {
1104                 err = -ENOENT;
1105                 goto out;
1106         }
1107
1108         for (i = 0; i <= xfrm_state_hmask; i++) {
1109                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1110                         if (!xfrm_id_proto_match(x->id.proto, proto))
1111                                 continue;
1112                         err = func(x, --count, data);
1113                         if (err)
1114                                 goto out;
1115                 }
1116         }
1117 out:
1118         spin_unlock_bh(&xfrm_state_lock);
1119         return err;
1120 }
1121 EXPORT_SYMBOL(xfrm_state_walk);
1122
1123
1124 void xfrm_replay_notify(struct xfrm_state *x, int event)
1125 {
1126         struct km_event c;
1127         /* we send notify messages in case
1128          *  1. we updated on of the sequence numbers, and the seqno difference
1129          *     is at least x->replay_maxdiff, in this case we also update the
1130          *     timeout of our timer function
1131          *  2. if x->replay_maxage has elapsed since last update,
1132          *     and there were changes
1133          *
1134          *  The state structure must be locked!
1135          */
1136
1137         switch (event) {
1138         case XFRM_REPLAY_UPDATE:
1139                 if (x->replay_maxdiff &&
1140                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1141                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1142                         if (x->xflags & XFRM_TIME_DEFER)
1143                                 event = XFRM_REPLAY_TIMEOUT;
1144                         else
1145                                 return;
1146                 }
1147
1148                 break;
1149
1150         case XFRM_REPLAY_TIMEOUT:
1151                 if ((x->replay.seq == x->preplay.seq) &&
1152                     (x->replay.bitmap == x->preplay.bitmap) &&
1153                     (x->replay.oseq == x->preplay.oseq)) {
1154                         x->xflags |= XFRM_TIME_DEFER;
1155                         return;
1156                 }
1157
1158                 break;
1159         }
1160
1161         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1162         c.event = XFRM_MSG_NEWAE;
1163         c.data.aevent = event;
1164         km_state_notify(x, &c);
1165
1166         if (x->replay_maxage &&
1167             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1168                 x->xflags &= ~XFRM_TIME_DEFER;
1169 }
1170 EXPORT_SYMBOL(xfrm_replay_notify);
1171
1172 static void xfrm_replay_timer_handler(unsigned long data)
1173 {
1174         struct xfrm_state *x = (struct xfrm_state*)data;
1175
1176         spin_lock(&x->lock);
1177
1178         if (x->km.state == XFRM_STATE_VALID) {
1179                 if (xfrm_aevent_is_on())
1180                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1181                 else
1182                         x->xflags |= XFRM_TIME_DEFER;
1183         }
1184
1185         spin_unlock(&x->lock);
1186 }
1187
1188 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1189 {
1190         u32 diff;
1191         u32 seq = ntohl(net_seq);
1192
1193         if (unlikely(seq == 0))
1194                 return -EINVAL;
1195
1196         if (likely(seq > x->replay.seq))
1197                 return 0;
1198
1199         diff = x->replay.seq - seq;
1200         if (diff >= x->props.replay_window) {
1201                 x->stats.replay_window++;
1202                 return -EINVAL;
1203         }
1204
1205         if (x->replay.bitmap & (1U << diff)) {
1206                 x->stats.replay++;
1207                 return -EINVAL;
1208         }
1209         return 0;
1210 }
1211 EXPORT_SYMBOL(xfrm_replay_check);
1212
1213 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1214 {
1215         u32 diff;
1216         u32 seq = ntohl(net_seq);
1217
1218         if (seq > x->replay.seq) {
1219                 diff = seq - x->replay.seq;
1220                 if (diff < x->props.replay_window)
1221                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1222                 else
1223                         x->replay.bitmap = 1;
1224                 x->replay.seq = seq;
1225         } else {
1226                 diff = x->replay.seq - seq;
1227                 x->replay.bitmap |= (1U << diff);
1228         }
1229
1230         if (xfrm_aevent_is_on())
1231                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1232 }
1233 EXPORT_SYMBOL(xfrm_replay_advance);
1234
1235 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1236 static DEFINE_RWLOCK(xfrm_km_lock);
1237
1238 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1239 {
1240         struct xfrm_mgr *km;
1241
1242         read_lock(&xfrm_km_lock);
1243         list_for_each_entry(km, &xfrm_km_list, list)
1244                 if (km->notify_policy)
1245                         km->notify_policy(xp, dir, c);
1246         read_unlock(&xfrm_km_lock);
1247 }
1248
1249 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1250 {
1251         struct xfrm_mgr *km;
1252         read_lock(&xfrm_km_lock);
1253         list_for_each_entry(km, &xfrm_km_list, list)
1254                 if (km->notify)
1255                         km->notify(x, c);
1256         read_unlock(&xfrm_km_lock);
1257 }
1258
1259 EXPORT_SYMBOL(km_policy_notify);
1260 EXPORT_SYMBOL(km_state_notify);
1261
1262 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1263 {
1264         struct km_event c;
1265
1266         c.data.hard = hard;
1267         c.pid = pid;
1268         c.event = XFRM_MSG_EXPIRE;
1269         km_state_notify(x, &c);
1270
1271         if (hard)
1272                 wake_up(&km_waitq);
1273 }
1274
1275 EXPORT_SYMBOL(km_state_expired);
1276 /*
1277  * We send to all registered managers regardless of failure
1278  * We are happy with one success
1279 */
1280 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1281 {
1282         int err = -EINVAL, acqret;
1283         struct xfrm_mgr *km;
1284
1285         read_lock(&xfrm_km_lock);
1286         list_for_each_entry(km, &xfrm_km_list, list) {
1287                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1288                 if (!acqret)
1289                         err = acqret;
1290         }
1291         read_unlock(&xfrm_km_lock);
1292         return err;
1293 }
1294 EXPORT_SYMBOL(km_query);
1295
1296 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1297 {
1298         int err = -EINVAL;
1299         struct xfrm_mgr *km;
1300
1301         read_lock(&xfrm_km_lock);
1302         list_for_each_entry(km, &xfrm_km_list, list) {
1303                 if (km->new_mapping)
1304                         err = km->new_mapping(x, ipaddr, sport);
1305                 if (!err)
1306                         break;
1307         }
1308         read_unlock(&xfrm_km_lock);
1309         return err;
1310 }
1311 EXPORT_SYMBOL(km_new_mapping);
1312
1313 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1314 {
1315         struct km_event c;
1316
1317         c.data.hard = hard;
1318         c.pid = pid;
1319         c.event = XFRM_MSG_POLEXPIRE;
1320         km_policy_notify(pol, dir, &c);
1321
1322         if (hard)
1323                 wake_up(&km_waitq);
1324 }
1325 EXPORT_SYMBOL(km_policy_expired);
1326
1327 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1328 {
1329         int err = -EINVAL;
1330         int ret;
1331         struct xfrm_mgr *km;
1332
1333         read_lock(&xfrm_km_lock);
1334         list_for_each_entry(km, &xfrm_km_list, list) {
1335                 if (km->report) {
1336                         ret = km->report(proto, sel, addr);
1337                         if (!ret)
1338                                 err = ret;
1339                 }
1340         }
1341         read_unlock(&xfrm_km_lock);
1342         return err;
1343 }
1344 EXPORT_SYMBOL(km_report);
1345
1346 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1347 {
1348         int err;
1349         u8 *data;
1350         struct xfrm_mgr *km;
1351         struct xfrm_policy *pol = NULL;
1352
1353         if (optlen <= 0 || optlen > PAGE_SIZE)
1354                 return -EMSGSIZE;
1355
1356         data = kmalloc(optlen, GFP_KERNEL);
1357         if (!data)
1358                 return -ENOMEM;
1359
1360         err = -EFAULT;
1361         if (copy_from_user(data, optval, optlen))
1362                 goto out;
1363
1364         err = -EINVAL;
1365         read_lock(&xfrm_km_lock);
1366         list_for_each_entry(km, &xfrm_km_list, list) {
1367                 pol = km->compile_policy(sk, optname, data,
1368                                          optlen, &err);
1369                 if (err >= 0)
1370                         break;
1371         }
1372         read_unlock(&xfrm_km_lock);
1373
1374         if (err >= 0) {
1375                 xfrm_sk_policy_insert(sk, err, pol);
1376                 xfrm_pol_put(pol);
1377                 err = 0;
1378         }
1379
1380 out:
1381         kfree(data);
1382         return err;
1383 }
1384 EXPORT_SYMBOL(xfrm_user_policy);
1385
1386 int xfrm_register_km(struct xfrm_mgr *km)
1387 {
1388         write_lock_bh(&xfrm_km_lock);
1389         list_add_tail(&km->list, &xfrm_km_list);
1390         write_unlock_bh(&xfrm_km_lock);
1391         return 0;
1392 }
1393 EXPORT_SYMBOL(xfrm_register_km);
1394
1395 int xfrm_unregister_km(struct xfrm_mgr *km)
1396 {
1397         write_lock_bh(&xfrm_km_lock);
1398         list_del(&km->list);
1399         write_unlock_bh(&xfrm_km_lock);
1400         return 0;
1401 }
1402 EXPORT_SYMBOL(xfrm_unregister_km);
1403
1404 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1405 {
1406         int err = 0;
1407         if (unlikely(afinfo == NULL))
1408                 return -EINVAL;
1409         if (unlikely(afinfo->family >= NPROTO))
1410                 return -EAFNOSUPPORT;
1411         write_lock_bh(&xfrm_state_afinfo_lock);
1412         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1413                 err = -ENOBUFS;
1414         else
1415                 xfrm_state_afinfo[afinfo->family] = afinfo;
1416         write_unlock_bh(&xfrm_state_afinfo_lock);
1417         return err;
1418 }
1419 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1420
1421 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1422 {
1423         int err = 0;
1424         if (unlikely(afinfo == NULL))
1425                 return -EINVAL;
1426         if (unlikely(afinfo->family >= NPROTO))
1427                 return -EAFNOSUPPORT;
1428         write_lock_bh(&xfrm_state_afinfo_lock);
1429         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1430                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1431                         err = -EINVAL;
1432                 else
1433                         xfrm_state_afinfo[afinfo->family] = NULL;
1434         }
1435         write_unlock_bh(&xfrm_state_afinfo_lock);
1436         return err;
1437 }
1438 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1439
1440 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1441 {
1442         struct xfrm_state_afinfo *afinfo;
1443         if (unlikely(family >= NPROTO))
1444                 return NULL;
1445         read_lock(&xfrm_state_afinfo_lock);
1446         afinfo = xfrm_state_afinfo[family];
1447         if (unlikely(!afinfo))
1448                 read_unlock(&xfrm_state_afinfo_lock);
1449         return afinfo;
1450 }
1451
1452 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1453 {
1454         read_unlock(&xfrm_state_afinfo_lock);
1455 }
1456
1457 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1458 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1459 {
1460         if (x->tunnel) {
1461                 struct xfrm_state *t = x->tunnel;
1462
1463                 if (atomic_read(&t->tunnel_users) == 2)
1464                         xfrm_state_delete(t);
1465                 atomic_dec(&t->tunnel_users);
1466                 xfrm_state_put(t);
1467                 x->tunnel = NULL;
1468         }
1469 }
1470 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1471
1472 /*
1473  * This function is NOT optimal.  For example, with ESP it will give an
1474  * MTU that's usually two bytes short of being optimal.  However, it will
1475  * usually give an answer that's a multiple of 4 provided the input is
1476  * also a multiple of 4.
1477  */
1478 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1479 {
1480         int res = mtu;
1481
1482         res -= x->props.header_len;
1483
1484         for (;;) {
1485                 int m = res;
1486
1487                 if (m < 68)
1488                         return 68;
1489
1490                 spin_lock_bh(&x->lock);
1491                 if (x->km.state == XFRM_STATE_VALID &&
1492                     x->type && x->type->get_max_size)
1493                         m = x->type->get_max_size(x, m);
1494                 else
1495                         m += x->props.header_len;
1496                 spin_unlock_bh(&x->lock);
1497
1498                 if (m <= mtu)
1499                         break;
1500                 res -= (m - mtu);
1501         }
1502
1503         return res;
1504 }
1505
1506 int xfrm_init_state(struct xfrm_state *x)
1507 {
1508         struct xfrm_state_afinfo *afinfo;
1509         int family = x->props.family;
1510         int err;
1511
1512         err = -EAFNOSUPPORT;
1513         afinfo = xfrm_state_get_afinfo(family);
1514         if (!afinfo)
1515                 goto error;
1516
1517         err = 0;
1518         if (afinfo->init_flags)
1519                 err = afinfo->init_flags(x);
1520
1521         xfrm_state_put_afinfo(afinfo);
1522
1523         if (err)
1524                 goto error;
1525
1526         err = -EPROTONOSUPPORT;
1527         x->type = xfrm_get_type(x->id.proto, family);
1528         if (x->type == NULL)
1529                 goto error;
1530
1531         err = x->type->init_state(x);
1532         if (err)
1533                 goto error;
1534
1535         x->mode = xfrm_get_mode(x->props.mode, family);
1536         if (x->mode == NULL)
1537                 goto error;
1538
1539         x->km.state = XFRM_STATE_VALID;
1540
1541 error:
1542         return err;
1543 }
1544
1545 EXPORT_SYMBOL(xfrm_init_state);
1546  
1547 void __init xfrm_state_init(void)
1548 {
1549         unsigned int sz;
1550
1551         sz = sizeof(struct hlist_head) * 8;
1552
1553         xfrm_state_bydst = xfrm_hash_alloc(sz);
1554         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1555         xfrm_state_byspi = xfrm_hash_alloc(sz);
1556         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1557                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1558         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1559
1560         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1561 }
1562