Remove obsolete #include <linux/config.h>
[safe/jmp/linux-2.6] / net / sched / cls_rsvp.h
1 /*
2  * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  */
11
12 /*
13    Comparing to general packet classification problem,
14    RSVP needs only sevaral relatively simple rules:
15
16    * (dst, protocol) are always specified,
17      so that we are able to hash them.
18    * src may be exact, or may be wildcard, so that
19      we can keep a hash table plus one wildcard entry.
20    * source port (or flow label) is important only if src is given.
21
22    IMPLEMENTATION.
23
24    We use a two level hash table: The top level is keyed by
25    destination address and protocol ID, every bucket contains a list
26    of "rsvp sessions", identified by destination address, protocol and
27    DPI(="Destination Port ID"): triple (key, mask, offset).
28
29    Every bucket has a smaller hash table keyed by source address
30    (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31    Every bucket is again a list of "RSVP flows", selected by
32    source address and SPI(="Source Port ID" here rather than
33    "security parameter index"): triple (key, mask, offset).
34
35
36    NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37    and all fragmented packets go to the best-effort traffic class.
38
39
40    NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41    only one "Generalized Port Identifier". So that for classic
42    ah, esp (and udp,tcp) both *pi should coincide or one of them
43    should be wildcard.
44
45    At first sight, this redundancy is just a waste of CPU
46    resources. But DPI and SPI add the possibility to assign different
47    priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50    NOTE 3. One complication is the case of tunneled packets.
51    We implement it as following: if the first lookup
52    matches a special session with "tunnelhdr" value not zero,
53    flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54    In this case, we pull tunnelhdr bytes and restart lookup
55    with tunnel ID added to the list of keys. Simple and stupid 8)8)
56    It's enough for PIMREG and IPIP.
57
58
59    NOTE 4. Two GPIs make it possible to parse even GRE packets.
60    F.e. DPI can select ETH_P_IP (and necessary flags to make
61    tunnelhdr correct) in GRE protocol field and SPI matches
62    GRE key. Is it not nice? 8)8)
63
64
65    Well, as result, despite its simplicity, we get a pretty
66    powerful classification engine.  */
67
68
69 struct rsvp_head
70 {
71         u32                     tmap[256/32];
72         u32                     hgenerator;
73         u8                      tgenerator;
74         struct rsvp_session     *ht[256];
75 };
76
77 struct rsvp_session
78 {
79         struct rsvp_session     *next;
80         u32                     dst[RSVP_DST_LEN];
81         struct tc_rsvp_gpi      dpi;
82         u8                      protocol;
83         u8                      tunnelid;
84         /* 16 (src,sport) hash slots, and one wildcard source slot */
85         struct rsvp_filter      *ht[16+1];
86 };
87
88
89 struct rsvp_filter
90 {
91         struct rsvp_filter      *next;
92         u32                     src[RSVP_DST_LEN];
93         struct tc_rsvp_gpi      spi;
94         u8                      tunnelhdr;
95
96         struct tcf_result       res;
97         struct tcf_exts         exts;
98
99         u32                     handle;
100         struct rsvp_session     *sess;
101 };
102
103 static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
104 {
105         unsigned h = dst[RSVP_DST_LEN-1];
106         h ^= h>>16;
107         h ^= h>>8;
108         return (h ^ protocol ^ tunnelid) & 0xFF;
109 }
110
111 static __inline__ unsigned hash_src(u32 *src)
112 {
113         unsigned h = src[RSVP_DST_LEN-1];
114         h ^= h>>16;
115         h ^= h>>8;
116         h ^= h>>4;
117         return h & 0xF;
118 }
119
120 static struct tcf_ext_map rsvp_ext_map = {
121         .police = TCA_RSVP_POLICE,
122         .action = TCA_RSVP_ACT
123 };
124
125 #define RSVP_APPLY_RESULT()                             \
126 {                                                       \
127         int r = tcf_exts_exec(skb, &f->exts, res);      \
128         if (r < 0)                                      \
129                 continue;                               \
130         else if (r > 0)                                 \
131                 return r;                               \
132 }
133         
134 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135                          struct tcf_result *res)
136 {
137         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
138         struct rsvp_session *s;
139         struct rsvp_filter *f;
140         unsigned h1, h2;
141         u32 *dst, *src;
142         u8 protocol;
143         u8 tunnelid = 0;
144         u8 *xprt;
145 #if RSVP_DST_LEN == 4
146         struct ipv6hdr *nhptr = skb->nh.ipv6h;
147 #else
148         struct iphdr *nhptr = skb->nh.iph;
149 #endif
150
151 restart:
152
153 #if RSVP_DST_LEN == 4
154         src = &nhptr->saddr.s6_addr32[0];
155         dst = &nhptr->daddr.s6_addr32[0];
156         protocol = nhptr->nexthdr;
157         xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
158 #else
159         src = &nhptr->saddr;
160         dst = &nhptr->daddr;
161         protocol = nhptr->protocol;
162         xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
163         if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
164                 return -1;
165 #endif
166
167         h1 = hash_dst(dst, protocol, tunnelid);
168         h2 = hash_src(src);
169
170         for (s = sht[h1]; s; s = s->next) {
171                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
172                     protocol == s->protocol &&
173                     !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
174 #if RSVP_DST_LEN == 4
175                     && dst[0] == s->dst[0]
176                     && dst[1] == s->dst[1]
177                     && dst[2] == s->dst[2]
178 #endif
179                     && tunnelid == s->tunnelid) {
180
181                         for (f = s->ht[h2]; f; f = f->next) {
182                                 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
183                                     !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
184 #if RSVP_DST_LEN == 4
185                                     && src[0] == f->src[0]
186                                     && src[1] == f->src[1]
187                                     && src[2] == f->src[2]
188 #endif
189                                     ) {
190                                         *res = f->res;
191                                         RSVP_APPLY_RESULT();
192
193 matched:
194                                         if (f->tunnelhdr == 0)
195                                                 return 0;
196
197                                         tunnelid = f->res.classid;
198                                         nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
199                                         goto restart;
200                                 }
201                         }
202
203                         /* And wildcard bucket... */
204                         for (f = s->ht[16]; f; f = f->next) {
205                                 *res = f->res;
206                                 RSVP_APPLY_RESULT();
207                                 goto matched;
208                         }
209                         return -1;
210                 }
211         }
212         return -1;
213 }
214
215 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
216 {
217         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
218         struct rsvp_session *s;
219         struct rsvp_filter *f;
220         unsigned h1 = handle&0xFF;
221         unsigned h2 = (handle>>8)&0xFF;
222
223         if (h2 > 16)
224                 return 0;
225
226         for (s = sht[h1]; s; s = s->next) {
227                 for (f = s->ht[h2]; f; f = f->next) {
228                         if (f->handle == handle)
229                                 return (unsigned long)f;
230                 }
231         }
232         return 0;
233 }
234
235 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
236 {
237 }
238
239 static int rsvp_init(struct tcf_proto *tp)
240 {
241         struct rsvp_head *data;
242
243         data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
244         if (data) {
245                 memset(data, 0, sizeof(struct rsvp_head));
246                 tp->root = data;
247                 return 0;
248         }
249         return -ENOBUFS;
250 }
251
252 static inline void
253 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
254 {
255         tcf_unbind_filter(tp, &f->res);
256         tcf_exts_destroy(tp, &f->exts);
257         kfree(f);
258 }
259
260 static void rsvp_destroy(struct tcf_proto *tp)
261 {
262         struct rsvp_head *data = xchg(&tp->root, NULL);
263         struct rsvp_session **sht;
264         int h1, h2;
265
266         if (data == NULL)
267                 return;
268
269         sht = data->ht;
270
271         for (h1=0; h1<256; h1++) {
272                 struct rsvp_session *s;
273
274                 while ((s = sht[h1]) != NULL) {
275                         sht[h1] = s->next;
276
277                         for (h2=0; h2<=16; h2++) {
278                                 struct rsvp_filter *f;
279
280                                 while ((f = s->ht[h2]) != NULL) {
281                                         s->ht[h2] = f->next;
282                                         rsvp_delete_filter(tp, f);
283                                 }
284                         }
285                         kfree(s);
286                 }
287         }
288         kfree(data);
289 }
290
291 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
292 {
293         struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
294         unsigned h = f->handle;
295         struct rsvp_session **sp;
296         struct rsvp_session *s = f->sess;
297         int i;
298
299         for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
300                 if (*fp == f) {
301                         tcf_tree_lock(tp);
302                         *fp = f->next;
303                         tcf_tree_unlock(tp);
304                         rsvp_delete_filter(tp, f);
305
306                         /* Strip tree */
307
308                         for (i=0; i<=16; i++)
309                                 if (s->ht[i])
310                                         return 0;
311
312                         /* OK, session has no flows */
313                         for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
314                              *sp; sp = &(*sp)->next) {
315                                 if (*sp == s) {
316                                         tcf_tree_lock(tp);
317                                         *sp = s->next;
318                                         tcf_tree_unlock(tp);
319
320                                         kfree(s);
321                                         return 0;
322                                 }
323                         }
324
325                         return 0;
326                 }
327         }
328         return 0;
329 }
330
331 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
332 {
333         struct rsvp_head *data = tp->root;
334         int i = 0xFFFF;
335
336         while (i-- > 0) {
337                 u32 h;
338                 if ((data->hgenerator += 0x10000) == 0)
339                         data->hgenerator = 0x10000;
340                 h = data->hgenerator|salt;
341                 if (rsvp_get(tp, h) == 0)
342                         return h;
343         }
344         return 0;
345 }
346
347 static int tunnel_bts(struct rsvp_head *data)
348 {
349         int n = data->tgenerator>>5;
350         u32 b = 1<<(data->tgenerator&0x1F);
351         
352         if (data->tmap[n]&b)
353                 return 0;
354         data->tmap[n] |= b;
355         return 1;
356 }
357
358 static void tunnel_recycle(struct rsvp_head *data)
359 {
360         struct rsvp_session **sht = data->ht;
361         u32 tmap[256/32];
362         int h1, h2;
363
364         memset(tmap, 0, sizeof(tmap));
365
366         for (h1=0; h1<256; h1++) {
367                 struct rsvp_session *s;
368                 for (s = sht[h1]; s; s = s->next) {
369                         for (h2=0; h2<=16; h2++) {
370                                 struct rsvp_filter *f;
371
372                                 for (f = s->ht[h2]; f; f = f->next) {
373                                         if (f->tunnelhdr == 0)
374                                                 continue;
375                                         data->tgenerator = f->res.classid;
376                                         tunnel_bts(data);
377                                 }
378                         }
379                 }
380         }
381
382         memcpy(data->tmap, tmap, sizeof(tmap));
383 }
384
385 static u32 gen_tunnel(struct rsvp_head *data)
386 {
387         int i, k;
388
389         for (k=0; k<2; k++) {
390                 for (i=255; i>0; i--) {
391                         if (++data->tgenerator == 0)
392                                 data->tgenerator = 1;
393                         if (tunnel_bts(data))
394                                 return data->tgenerator;
395                 }
396                 tunnel_recycle(data);
397         }
398         return 0;
399 }
400
401 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
402                        u32 handle,
403                        struct rtattr **tca,
404                        unsigned long *arg)
405 {
406         struct rsvp_head *data = tp->root;
407         struct rsvp_filter *f, **fp;
408         struct rsvp_session *s, **sp;
409         struct tc_rsvp_pinfo *pinfo = NULL;
410         struct rtattr *opt = tca[TCA_OPTIONS-1];
411         struct rtattr *tb[TCA_RSVP_MAX];
412         struct tcf_exts e;
413         unsigned h1, h2;
414         u32 *dst;
415         int err;
416
417         if (opt == NULL)
418                 return handle ? -EINVAL : 0;
419
420         if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0)
421                 return -EINVAL;
422
423         err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
424         if (err < 0)
425                 return err;
426
427         if ((f = (struct rsvp_filter*)*arg) != NULL) {
428                 /* Node exists: adjust only classid */
429
430                 if (f->handle != handle && handle)
431                         goto errout2;
432                 if (tb[TCA_RSVP_CLASSID-1]) {
433                         f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
434                         tcf_bind_filter(tp, &f->res, base);
435                 }
436
437                 tcf_exts_change(tp, &f->exts, &e);
438                 return 0;
439         }
440
441         /* Now more serious part... */
442         err = -EINVAL;
443         if (handle)
444                 goto errout2;
445         if (tb[TCA_RSVP_DST-1] == NULL)
446                 goto errout2;
447
448         err = -ENOBUFS;
449         f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
450         if (f == NULL)
451                 goto errout2;
452
453         memset(f, 0, sizeof(*f));
454         h2 = 16;
455         if (tb[TCA_RSVP_SRC-1]) {
456                 err = -EINVAL;
457                 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
458                         goto errout;
459                 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
460                 h2 = hash_src(f->src);
461         }
462         if (tb[TCA_RSVP_PINFO-1]) {
463                 err = -EINVAL;
464                 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
465                         goto errout;
466                 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
467                 f->spi = pinfo->spi;
468                 f->tunnelhdr = pinfo->tunnelhdr;
469         }
470         if (tb[TCA_RSVP_CLASSID-1]) {
471                 err = -EINVAL;
472                 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
473                         goto errout;
474                 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
475         }
476
477         err = -EINVAL;
478         if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
479                 goto errout;
480         dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
481         h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
482
483         err = -ENOMEM;
484         if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
485                 goto errout;
486
487         if (f->tunnelhdr) {
488                 err = -EINVAL;
489                 if (f->res.classid > 255)
490                         goto errout;
491
492                 err = -ENOMEM;
493                 if (f->res.classid == 0 &&
494                     (f->res.classid = gen_tunnel(data)) == 0)
495                         goto errout;
496         }
497
498         for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
499                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
500                     pinfo && pinfo->protocol == s->protocol &&
501                     memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
502 #if RSVP_DST_LEN == 4
503                     && dst[0] == s->dst[0]
504                     && dst[1] == s->dst[1]
505                     && dst[2] == s->dst[2]
506 #endif
507                     && pinfo->tunnelid == s->tunnelid) {
508
509 insert:
510                         /* OK, we found appropriate session */
511
512                         fp = &s->ht[h2];
513
514                         f->sess = s;
515                         if (f->tunnelhdr == 0)
516                                 tcf_bind_filter(tp, &f->res, base);
517
518                         tcf_exts_change(tp, &f->exts, &e);
519
520                         for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
521                                 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
522                                         break;
523                         f->next = *fp;
524                         wmb();
525                         *fp = f;
526
527                         *arg = (unsigned long)f;
528                         return 0;
529                 }
530         }
531
532         /* No session found. Create new one. */
533
534         err = -ENOBUFS;
535         s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
536         if (s == NULL)
537                 goto errout;
538         memset(s, 0, sizeof(*s));
539         memcpy(s->dst, dst, sizeof(s->dst));
540
541         if (pinfo) {
542                 s->dpi = pinfo->dpi;
543                 s->protocol = pinfo->protocol;
544                 s->tunnelid = pinfo->tunnelid;
545         }
546         for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
547                 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
548                         break;
549         }
550         s->next = *sp;
551         wmb();
552         *sp = s;
553         
554         goto insert;
555
556 errout:
557         kfree(f);
558 errout2:
559         tcf_exts_destroy(tp, &e);
560         return err;
561 }
562
563 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
564 {
565         struct rsvp_head *head = tp->root;
566         unsigned h, h1;
567
568         if (arg->stop)
569                 return;
570
571         for (h = 0; h < 256; h++) {
572                 struct rsvp_session *s;
573
574                 for (s = head->ht[h]; s; s = s->next) {
575                         for (h1 = 0; h1 <= 16; h1++) {
576                                 struct rsvp_filter *f;
577
578                                 for (f = s->ht[h1]; f; f = f->next) {
579                                         if (arg->count < arg->skip) {
580                                                 arg->count++;
581                                                 continue;
582                                         }
583                                         if (arg->fn(tp, (unsigned long)f, arg) < 0) {
584                                                 arg->stop = 1;
585                                                 return;
586                                         }
587                                         arg->count++;
588                                 }
589                         }
590                 }
591         }
592 }
593
594 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
595                      struct sk_buff *skb, struct tcmsg *t)
596 {
597         struct rsvp_filter *f = (struct rsvp_filter*)fh;
598         struct rsvp_session *s;
599         unsigned char    *b = skb->tail;
600         struct rtattr *rta;
601         struct tc_rsvp_pinfo pinfo;
602
603         if (f == NULL)
604                 return skb->len;
605         s = f->sess;
606
607         t->tcm_handle = f->handle;
608
609
610         rta = (struct rtattr*)b;
611         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
612
613         RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
614         pinfo.dpi = s->dpi;
615         pinfo.spi = f->spi;
616         pinfo.protocol = s->protocol;
617         pinfo.tunnelid = s->tunnelid;
618         pinfo.tunnelhdr = f->tunnelhdr;
619         pinfo.pad = 0;
620         RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
621         if (f->res.classid)
622                 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
623         if (((f->handle>>8)&0xFF) != 16)
624                 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
625
626         if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
627                 goto rtattr_failure;
628
629         rta->rta_len = skb->tail - b;
630
631         if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
632                 goto rtattr_failure;
633         return skb->len;
634
635 rtattr_failure:
636         skb_trim(skb, b - skb->data);
637         return -1;
638 }
639
640 static struct tcf_proto_ops RSVP_OPS = {
641         .next           =       NULL,
642         .kind           =       RSVP_ID,
643         .classify       =       rsvp_classify,
644         .init           =       rsvp_init,
645         .destroy        =       rsvp_destroy,
646         .get            =       rsvp_get,
647         .put            =       rsvp_put,
648         .change         =       rsvp_change,
649         .delete         =       rsvp_delete,
650         .walk           =       rsvp_walk,
651         .dump           =       rsvp_dump,
652         .owner          =       THIS_MODULE,
653 };
654
655 static int __init init_rsvp(void)
656 {
657         return register_tcf_proto_ops(&RSVP_OPS);
658 }
659
660 static void __exit exit_rsvp(void) 
661 {
662         unregister_tcf_proto_ops(&RSVP_OPS);
663 }
664
665 module_init(init_rsvp)
666 module_exit(exit_rsvp)