[NET_SCHED]: Use typeful attribute construction helpers
[safe/jmp/linux-2.6] / net / sched / cls_rsvp.h
1 /*
2  * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  */
11
12 /*
13    Comparing to general packet classification problem,
14    RSVP needs only sevaral relatively simple rules:
15
16    * (dst, protocol) are always specified,
17      so that we are able to hash them.
18    * src may be exact, or may be wildcard, so that
19      we can keep a hash table plus one wildcard entry.
20    * source port (or flow label) is important only if src is given.
21
22    IMPLEMENTATION.
23
24    We use a two level hash table: The top level is keyed by
25    destination address and protocol ID, every bucket contains a list
26    of "rsvp sessions", identified by destination address, protocol and
27    DPI(="Destination Port ID"): triple (key, mask, offset).
28
29    Every bucket has a smaller hash table keyed by source address
30    (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31    Every bucket is again a list of "RSVP flows", selected by
32    source address and SPI(="Source Port ID" here rather than
33    "security parameter index"): triple (key, mask, offset).
34
35
36    NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37    and all fragmented packets go to the best-effort traffic class.
38
39
40    NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41    only one "Generalized Port Identifier". So that for classic
42    ah, esp (and udp,tcp) both *pi should coincide or one of them
43    should be wildcard.
44
45    At first sight, this redundancy is just a waste of CPU
46    resources. But DPI and SPI add the possibility to assign different
47    priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50    NOTE 3. One complication is the case of tunneled packets.
51    We implement it as following: if the first lookup
52    matches a special session with "tunnelhdr" value not zero,
53    flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54    In this case, we pull tunnelhdr bytes and restart lookup
55    with tunnel ID added to the list of keys. Simple and stupid 8)8)
56    It's enough for PIMREG and IPIP.
57
58
59    NOTE 4. Two GPIs make it possible to parse even GRE packets.
60    F.e. DPI can select ETH_P_IP (and necessary flags to make
61    tunnelhdr correct) in GRE protocol field and SPI matches
62    GRE key. Is it not nice? 8)8)
63
64
65    Well, as result, despite its simplicity, we get a pretty
66    powerful classification engine.  */
67
68
69 struct rsvp_head
70 {
71         u32                     tmap[256/32];
72         u32                     hgenerator;
73         u8                      tgenerator;
74         struct rsvp_session     *ht[256];
75 };
76
77 struct rsvp_session
78 {
79         struct rsvp_session     *next;
80         __be32                  dst[RSVP_DST_LEN];
81         struct tc_rsvp_gpi      dpi;
82         u8                      protocol;
83         u8                      tunnelid;
84         /* 16 (src,sport) hash slots, and one wildcard source slot */
85         struct rsvp_filter      *ht[16+1];
86 };
87
88
89 struct rsvp_filter
90 {
91         struct rsvp_filter      *next;
92         __be32                  src[RSVP_DST_LEN];
93         struct tc_rsvp_gpi      spi;
94         u8                      tunnelhdr;
95
96         struct tcf_result       res;
97         struct tcf_exts         exts;
98
99         u32                     handle;
100         struct rsvp_session     *sess;
101 };
102
103 static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104 {
105         unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
106         h ^= h>>16;
107         h ^= h>>8;
108         return (h ^ protocol ^ tunnelid) & 0xFF;
109 }
110
111 static __inline__ unsigned hash_src(__be32 *src)
112 {
113         unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
114         h ^= h>>16;
115         h ^= h>>8;
116         h ^= h>>4;
117         return h & 0xF;
118 }
119
120 static struct tcf_ext_map rsvp_ext_map = {
121         .police = TCA_RSVP_POLICE,
122         .action = TCA_RSVP_ACT
123 };
124
125 #define RSVP_APPLY_RESULT()                             \
126 {                                                       \
127         int r = tcf_exts_exec(skb, &f->exts, res);      \
128         if (r < 0)                                      \
129                 continue;                               \
130         else if (r > 0)                                 \
131                 return r;                               \
132 }
133
134 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135                          struct tcf_result *res)
136 {
137         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
138         struct rsvp_session *s;
139         struct rsvp_filter *f;
140         unsigned h1, h2;
141         __be32 *dst, *src;
142         u8 protocol;
143         u8 tunnelid = 0;
144         u8 *xprt;
145 #if RSVP_DST_LEN == 4
146         struct ipv6hdr *nhptr = ipv6_hdr(skb);
147 #else
148         struct iphdr *nhptr = ip_hdr(skb);
149 #endif
150
151 restart:
152
153 #if RSVP_DST_LEN == 4
154         src = &nhptr->saddr.s6_addr32[0];
155         dst = &nhptr->daddr.s6_addr32[0];
156         protocol = nhptr->nexthdr;
157         xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
158 #else
159         src = &nhptr->saddr;
160         dst = &nhptr->daddr;
161         protocol = nhptr->protocol;
162         xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
163         if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
164                 return -1;
165 #endif
166
167         h1 = hash_dst(dst, protocol, tunnelid);
168         h2 = hash_src(src);
169
170         for (s = sht[h1]; s; s = s->next) {
171                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
172                     protocol == s->protocol &&
173                     !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
174 #if RSVP_DST_LEN == 4
175                     && dst[0] == s->dst[0]
176                     && dst[1] == s->dst[1]
177                     && dst[2] == s->dst[2]
178 #endif
179                     && tunnelid == s->tunnelid) {
180
181                         for (f = s->ht[h2]; f; f = f->next) {
182                                 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
183                                     !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
184 #if RSVP_DST_LEN == 4
185                                     && src[0] == f->src[0]
186                                     && src[1] == f->src[1]
187                                     && src[2] == f->src[2]
188 #endif
189                                     ) {
190                                         *res = f->res;
191                                         RSVP_APPLY_RESULT();
192
193 matched:
194                                         if (f->tunnelhdr == 0)
195                                                 return 0;
196
197                                         tunnelid = f->res.classid;
198                                         nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
199                                         goto restart;
200                                 }
201                         }
202
203                         /* And wildcard bucket... */
204                         for (f = s->ht[16]; f; f = f->next) {
205                                 *res = f->res;
206                                 RSVP_APPLY_RESULT();
207                                 goto matched;
208                         }
209                         return -1;
210                 }
211         }
212         return -1;
213 }
214
215 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
216 {
217         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
218         struct rsvp_session *s;
219         struct rsvp_filter *f;
220         unsigned h1 = handle&0xFF;
221         unsigned h2 = (handle>>8)&0xFF;
222
223         if (h2 > 16)
224                 return 0;
225
226         for (s = sht[h1]; s; s = s->next) {
227                 for (f = s->ht[h2]; f; f = f->next) {
228                         if (f->handle == handle)
229                                 return (unsigned long)f;
230                 }
231         }
232         return 0;
233 }
234
235 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
236 {
237 }
238
239 static int rsvp_init(struct tcf_proto *tp)
240 {
241         struct rsvp_head *data;
242
243         data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
244         if (data) {
245                 tp->root = data;
246                 return 0;
247         }
248         return -ENOBUFS;
249 }
250
251 static inline void
252 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
253 {
254         tcf_unbind_filter(tp, &f->res);
255         tcf_exts_destroy(tp, &f->exts);
256         kfree(f);
257 }
258
259 static void rsvp_destroy(struct tcf_proto *tp)
260 {
261         struct rsvp_head *data = xchg(&tp->root, NULL);
262         struct rsvp_session **sht;
263         int h1, h2;
264
265         if (data == NULL)
266                 return;
267
268         sht = data->ht;
269
270         for (h1=0; h1<256; h1++) {
271                 struct rsvp_session *s;
272
273                 while ((s = sht[h1]) != NULL) {
274                         sht[h1] = s->next;
275
276                         for (h2=0; h2<=16; h2++) {
277                                 struct rsvp_filter *f;
278
279                                 while ((f = s->ht[h2]) != NULL) {
280                                         s->ht[h2] = f->next;
281                                         rsvp_delete_filter(tp, f);
282                                 }
283                         }
284                         kfree(s);
285                 }
286         }
287         kfree(data);
288 }
289
290 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
291 {
292         struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
293         unsigned h = f->handle;
294         struct rsvp_session **sp;
295         struct rsvp_session *s = f->sess;
296         int i;
297
298         for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
299                 if (*fp == f) {
300                         tcf_tree_lock(tp);
301                         *fp = f->next;
302                         tcf_tree_unlock(tp);
303                         rsvp_delete_filter(tp, f);
304
305                         /* Strip tree */
306
307                         for (i=0; i<=16; i++)
308                                 if (s->ht[i])
309                                         return 0;
310
311                         /* OK, session has no flows */
312                         for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
313                              *sp; sp = &(*sp)->next) {
314                                 if (*sp == s) {
315                                         tcf_tree_lock(tp);
316                                         *sp = s->next;
317                                         tcf_tree_unlock(tp);
318
319                                         kfree(s);
320                                         return 0;
321                                 }
322                         }
323
324                         return 0;
325                 }
326         }
327         return 0;
328 }
329
330 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
331 {
332         struct rsvp_head *data = tp->root;
333         int i = 0xFFFF;
334
335         while (i-- > 0) {
336                 u32 h;
337                 if ((data->hgenerator += 0x10000) == 0)
338                         data->hgenerator = 0x10000;
339                 h = data->hgenerator|salt;
340                 if (rsvp_get(tp, h) == 0)
341                         return h;
342         }
343         return 0;
344 }
345
346 static int tunnel_bts(struct rsvp_head *data)
347 {
348         int n = data->tgenerator>>5;
349         u32 b = 1<<(data->tgenerator&0x1F);
350
351         if (data->tmap[n]&b)
352                 return 0;
353         data->tmap[n] |= b;
354         return 1;
355 }
356
357 static void tunnel_recycle(struct rsvp_head *data)
358 {
359         struct rsvp_session **sht = data->ht;
360         u32 tmap[256/32];
361         int h1, h2;
362
363         memset(tmap, 0, sizeof(tmap));
364
365         for (h1=0; h1<256; h1++) {
366                 struct rsvp_session *s;
367                 for (s = sht[h1]; s; s = s->next) {
368                         for (h2=0; h2<=16; h2++) {
369                                 struct rsvp_filter *f;
370
371                                 for (f = s->ht[h2]; f; f = f->next) {
372                                         if (f->tunnelhdr == 0)
373                                                 continue;
374                                         data->tgenerator = f->res.classid;
375                                         tunnel_bts(data);
376                                 }
377                         }
378                 }
379         }
380
381         memcpy(data->tmap, tmap, sizeof(tmap));
382 }
383
384 static u32 gen_tunnel(struct rsvp_head *data)
385 {
386         int i, k;
387
388         for (k=0; k<2; k++) {
389                 for (i=255; i>0; i--) {
390                         if (++data->tgenerator == 0)
391                                 data->tgenerator = 1;
392                         if (tunnel_bts(data))
393                                 return data->tgenerator;
394                 }
395                 tunnel_recycle(data);
396         }
397         return 0;
398 }
399
400 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
401                        u32 handle,
402                        struct nlattr **tca,
403                        unsigned long *arg)
404 {
405         struct rsvp_head *data = tp->root;
406         struct rsvp_filter *f, **fp;
407         struct rsvp_session *s, **sp;
408         struct tc_rsvp_pinfo *pinfo = NULL;
409         struct nlattr *opt = tca[TCA_OPTIONS-1];
410         struct nlattr *tb[TCA_RSVP_MAX + 1];
411         struct tcf_exts e;
412         unsigned h1, h2;
413         __be32 *dst;
414         int err;
415
416         if (opt == NULL)
417                 return handle ? -EINVAL : 0;
418
419         err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, NULL);
420         if (err < 0)
421                 return err;
422
423         err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
424         if (err < 0)
425                 return err;
426
427         if ((f = (struct rsvp_filter*)*arg) != NULL) {
428                 /* Node exists: adjust only classid */
429
430                 if (f->handle != handle && handle)
431                         goto errout2;
432                 if (tb[TCA_RSVP_CLASSID-1]) {
433                         f->res.classid = *(u32*)nla_data(tb[TCA_RSVP_CLASSID-1]);
434                         tcf_bind_filter(tp, &f->res, base);
435                 }
436
437                 tcf_exts_change(tp, &f->exts, &e);
438                 return 0;
439         }
440
441         /* Now more serious part... */
442         err = -EINVAL;
443         if (handle)
444                 goto errout2;
445         if (tb[TCA_RSVP_DST-1] == NULL)
446                 goto errout2;
447
448         err = -ENOBUFS;
449         f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
450         if (f == NULL)
451                 goto errout2;
452
453         h2 = 16;
454         if (tb[TCA_RSVP_SRC-1]) {
455                 err = -EINVAL;
456                 if (nla_len(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
457                         goto errout;
458                 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
459                 h2 = hash_src(f->src);
460         }
461         if (tb[TCA_RSVP_PINFO-1]) {
462                 err = -EINVAL;
463                 if (nla_len(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
464                         goto errout;
465                 pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
466                 f->spi = pinfo->spi;
467                 f->tunnelhdr = pinfo->tunnelhdr;
468         }
469         if (tb[TCA_RSVP_CLASSID-1]) {
470                 err = -EINVAL;
471                 if (nla_len(tb[TCA_RSVP_CLASSID-1]) != 4)
472                         goto errout;
473                 f->res.classid = *(u32*)nla_data(tb[TCA_RSVP_CLASSID-1]);
474         }
475
476         err = -EINVAL;
477         if (nla_len(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
478                 goto errout;
479         dst = nla_data(tb[TCA_RSVP_DST-1]);
480         h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
481
482         err = -ENOMEM;
483         if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
484                 goto errout;
485
486         if (f->tunnelhdr) {
487                 err = -EINVAL;
488                 if (f->res.classid > 255)
489                         goto errout;
490
491                 err = -ENOMEM;
492                 if (f->res.classid == 0 &&
493                     (f->res.classid = gen_tunnel(data)) == 0)
494                         goto errout;
495         }
496
497         for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
498                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
499                     pinfo && pinfo->protocol == s->protocol &&
500                     memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
501 #if RSVP_DST_LEN == 4
502                     && dst[0] == s->dst[0]
503                     && dst[1] == s->dst[1]
504                     && dst[2] == s->dst[2]
505 #endif
506                     && pinfo->tunnelid == s->tunnelid) {
507
508 insert:
509                         /* OK, we found appropriate session */
510
511                         fp = &s->ht[h2];
512
513                         f->sess = s;
514                         if (f->tunnelhdr == 0)
515                                 tcf_bind_filter(tp, &f->res, base);
516
517                         tcf_exts_change(tp, &f->exts, &e);
518
519                         for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
520                                 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
521                                         break;
522                         f->next = *fp;
523                         wmb();
524                         *fp = f;
525
526                         *arg = (unsigned long)f;
527                         return 0;
528                 }
529         }
530
531         /* No session found. Create new one. */
532
533         err = -ENOBUFS;
534         s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
535         if (s == NULL)
536                 goto errout;
537         memcpy(s->dst, dst, sizeof(s->dst));
538
539         if (pinfo) {
540                 s->dpi = pinfo->dpi;
541                 s->protocol = pinfo->protocol;
542                 s->tunnelid = pinfo->tunnelid;
543         }
544         for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
545                 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
546                         break;
547         }
548         s->next = *sp;
549         wmb();
550         *sp = s;
551
552         goto insert;
553
554 errout:
555         kfree(f);
556 errout2:
557         tcf_exts_destroy(tp, &e);
558         return err;
559 }
560
561 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
562 {
563         struct rsvp_head *head = tp->root;
564         unsigned h, h1;
565
566         if (arg->stop)
567                 return;
568
569         for (h = 0; h < 256; h++) {
570                 struct rsvp_session *s;
571
572                 for (s = head->ht[h]; s; s = s->next) {
573                         for (h1 = 0; h1 <= 16; h1++) {
574                                 struct rsvp_filter *f;
575
576                                 for (f = s->ht[h1]; f; f = f->next) {
577                                         if (arg->count < arg->skip) {
578                                                 arg->count++;
579                                                 continue;
580                                         }
581                                         if (arg->fn(tp, (unsigned long)f, arg) < 0) {
582                                                 arg->stop = 1;
583                                                 return;
584                                         }
585                                         arg->count++;
586                                 }
587                         }
588                 }
589         }
590 }
591
592 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
593                      struct sk_buff *skb, struct tcmsg *t)
594 {
595         struct rsvp_filter *f = (struct rsvp_filter*)fh;
596         struct rsvp_session *s;
597         unsigned char *b = skb_tail_pointer(skb);
598         struct nlattr *nest;
599         struct tc_rsvp_pinfo pinfo;
600
601         if (f == NULL)
602                 return skb->len;
603         s = f->sess;
604
605         t->tcm_handle = f->handle;
606
607         nest = nla_nest_start(skb, TCA_OPTIONS);
608         if (nest == NULL)
609                 goto nla_put_failure;
610
611         NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
612         pinfo.dpi = s->dpi;
613         pinfo.spi = f->spi;
614         pinfo.protocol = s->protocol;
615         pinfo.tunnelid = s->tunnelid;
616         pinfo.tunnelhdr = f->tunnelhdr;
617         pinfo.pad = 0;
618         NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
619         if (f->res.classid)
620                 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
621         if (((f->handle>>8)&0xFF) != 16)
622                 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
623
624         if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
625                 goto nla_put_failure;
626
627         nla_nest_end(skb, nest);
628
629         if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
630                 goto nla_put_failure;
631         return skb->len;
632
633 nla_put_failure:
634         nlmsg_trim(skb, b);
635         return -1;
636 }
637
638 static struct tcf_proto_ops RSVP_OPS = {
639         .next           =       NULL,
640         .kind           =       RSVP_ID,
641         .classify       =       rsvp_classify,
642         .init           =       rsvp_init,
643         .destroy        =       rsvp_destroy,
644         .get            =       rsvp_get,
645         .put            =       rsvp_put,
646         .change         =       rsvp_change,
647         .delete         =       rsvp_delete,
648         .walk           =       rsvp_walk,
649         .dump           =       rsvp_dump,
650         .owner          =       THIS_MODULE,
651 };
652
653 static int __init init_rsvp(void)
654 {
655         return register_tcf_proto_ops(&RSVP_OPS);
656 }
657
658 static void __exit exit_rsvp(void)
659 {
660         unregister_tcf_proto_ops(&RSVP_OPS);
661 }
662
663 module_init(init_rsvp)
664 module_exit(exit_rsvp)