[NET_SCHED]: cls_flow: fix key mask validity check
[safe/jmp/linux-2.6] / net / sched / cls_flow.c
1 /*
2  * net/sched/cls_flow.c         Generic flow classifier
3  *
4  * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/list.h>
15 #include <linux/jhash.h>
16 #include <linux/random.h>
17 #include <linux/pkt_cls.h>
18 #include <linux/skbuff.h>
19 #include <linux/in.h>
20 #include <linux/ip.h>
21 #include <linux/ipv6.h>
22
23 #include <net/pkt_cls.h>
24 #include <net/ip.h>
25 #include <net/route.h>
26 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
27 #include <net/netfilter/nf_conntrack.h>
28 #endif
29
30 struct flow_head {
31         struct list_head        filters;
32 };
33
34 struct flow_filter {
35         struct list_head        list;
36         struct tcf_exts         exts;
37         struct tcf_ematch_tree  ematches;
38         u32                     handle;
39
40         u32                     nkeys;
41         u32                     keymask;
42         u32                     mode;
43         u32                     mask;
44         u32                     xor;
45         u32                     rshift;
46         u32                     addend;
47         u32                     divisor;
48         u32                     baseclass;
49 };
50
51 static u32 flow_hashrnd __read_mostly;
52 static int flow_hashrnd_initted __read_mostly;
53
54 static const struct tcf_ext_map flow_ext_map = {
55         .action = TCA_FLOW_ACT,
56         .police = TCA_FLOW_POLICE,
57 };
58
59 static inline u32 addr_fold(void *addr)
60 {
61         unsigned long a = (unsigned long)addr;
62
63         return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
64 }
65
66 static u32 flow_get_src(const struct sk_buff *skb)
67 {
68         switch (skb->protocol) {
69         case __constant_htons(ETH_P_IP):
70                 return ntohl(ip_hdr(skb)->saddr);
71         case __constant_htons(ETH_P_IPV6):
72                 return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
73         default:
74                 return addr_fold(skb->sk);
75         }
76 }
77
78 static u32 flow_get_dst(const struct sk_buff *skb)
79 {
80         switch (skb->protocol) {
81         case __constant_htons(ETH_P_IP):
82                 return ntohl(ip_hdr(skb)->daddr);
83         case __constant_htons(ETH_P_IPV6):
84                 return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
85         default:
86                 return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
87         }
88 }
89
90 static u32 flow_get_proto(const struct sk_buff *skb)
91 {
92         switch (skb->protocol) {
93         case __constant_htons(ETH_P_IP):
94                 return ip_hdr(skb)->protocol;
95         case __constant_htons(ETH_P_IPV6):
96                 return ipv6_hdr(skb)->nexthdr;
97         default:
98                 return 0;
99         }
100 }
101
102 static int has_ports(u8 protocol)
103 {
104         switch (protocol) {
105         case IPPROTO_TCP:
106         case IPPROTO_UDP:
107         case IPPROTO_UDPLITE:
108         case IPPROTO_SCTP:
109         case IPPROTO_DCCP:
110         case IPPROTO_ESP:
111                 return 1;
112         default:
113                 return 0;
114         }
115 }
116
117 static u32 flow_get_proto_src(const struct sk_buff *skb)
118 {
119         u32 res = 0;
120
121         switch (skb->protocol) {
122         case __constant_htons(ETH_P_IP): {
123                 struct iphdr *iph = ip_hdr(skb);
124
125                 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
126                     has_ports(iph->protocol))
127                         res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
128                 break;
129         }
130         case __constant_htons(ETH_P_IPV6): {
131                 struct ipv6hdr *iph = ipv6_hdr(skb);
132
133                 if (has_ports(iph->nexthdr))
134                         res = ntohs(*(__be16 *)&iph[1]);
135                 break;
136         }
137         default:
138                 res = addr_fold(skb->sk);
139         }
140
141         return res;
142 }
143
144 static u32 flow_get_proto_dst(const struct sk_buff *skb)
145 {
146         u32 res = 0;
147
148         switch (skb->protocol) {
149         case __constant_htons(ETH_P_IP): {
150                 struct iphdr *iph = ip_hdr(skb);
151
152                 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
153                     has_ports(iph->protocol))
154                         res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
155                 break;
156         }
157         case __constant_htons(ETH_P_IPV6): {
158                 struct ipv6hdr *iph = ipv6_hdr(skb);
159
160                 if (has_ports(iph->nexthdr))
161                         res = ntohs(*(__be16 *)((void *)&iph[1] + 2));
162                 break;
163         }
164         default:
165                 res = addr_fold(skb->dst) ^ (__force u16)skb->protocol;
166         }
167
168         return res;
169 }
170
171 static u32 flow_get_iif(const struct sk_buff *skb)
172 {
173         return skb->iif;
174 }
175
176 static u32 flow_get_priority(const struct sk_buff *skb)
177 {
178         return skb->priority;
179 }
180
181 static u32 flow_get_mark(const struct sk_buff *skb)
182 {
183         return skb->mark;
184 }
185
186 static u32 flow_get_nfct(const struct sk_buff *skb)
187 {
188 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
189         return addr_fold(skb->nfct);
190 #else
191         return 0;
192 #endif
193 }
194
195 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
196 #define CTTUPLE(skb, member)                                            \
197 ({                                                                      \
198         enum ip_conntrack_info ctinfo;                                  \
199         struct nf_conn *ct = nf_ct_get(skb, &ctinfo);                   \
200         if (ct == NULL)                                                 \
201                 goto fallback;                                          \
202         ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;                 \
203 })
204 #else
205 #define CTTUPLE(skb, member)                                            \
206 ({                                                                      \
207         goto fallback;                                                  \
208         0;                                                              \
209 })
210 #endif
211
212 static u32 flow_get_nfct_src(const struct sk_buff *skb)
213 {
214         switch (skb->protocol) {
215         case __constant_htons(ETH_P_IP):
216                 return ntohl(CTTUPLE(skb, src.u3.ip));
217         case __constant_htons(ETH_P_IPV6):
218                 return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
219         }
220 fallback:
221         return flow_get_src(skb);
222 }
223
224 static u32 flow_get_nfct_dst(const struct sk_buff *skb)
225 {
226         switch (skb->protocol) {
227         case __constant_htons(ETH_P_IP):
228                 return ntohl(CTTUPLE(skb, dst.u3.ip));
229         case __constant_htons(ETH_P_IPV6):
230                 return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
231         }
232 fallback:
233         return flow_get_dst(skb);
234 }
235
236 static u32 flow_get_nfct_proto_src(const struct sk_buff *skb)
237 {
238         return ntohs(CTTUPLE(skb, src.u.all));
239 fallback:
240         return flow_get_proto_src(skb);
241 }
242
243 static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb)
244 {
245         return ntohs(CTTUPLE(skb, dst.u.all));
246 fallback:
247         return flow_get_proto_dst(skb);
248 }
249
250 static u32 flow_get_rtclassid(const struct sk_buff *skb)
251 {
252 #ifdef CONFIG_NET_CLS_ROUTE
253         if (skb->dst)
254                 return skb->dst->tclassid;
255 #endif
256         return 0;
257 }
258
259 static u32 flow_get_skuid(const struct sk_buff *skb)
260 {
261         if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
262                 return skb->sk->sk_socket->file->f_uid;
263         return 0;
264 }
265
266 static u32 flow_get_skgid(const struct sk_buff *skb)
267 {
268         if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
269                 return skb->sk->sk_socket->file->f_gid;
270         return 0;
271 }
272
273 static u32 flow_key_get(const struct sk_buff *skb, int key)
274 {
275         switch (key) {
276         case FLOW_KEY_SRC:
277                 return flow_get_src(skb);
278         case FLOW_KEY_DST:
279                 return flow_get_dst(skb);
280         case FLOW_KEY_PROTO:
281                 return flow_get_proto(skb);
282         case FLOW_KEY_PROTO_SRC:
283                 return flow_get_proto_src(skb);
284         case FLOW_KEY_PROTO_DST:
285                 return flow_get_proto_dst(skb);
286         case FLOW_KEY_IIF:
287                 return flow_get_iif(skb);
288         case FLOW_KEY_PRIORITY:
289                 return flow_get_priority(skb);
290         case FLOW_KEY_MARK:
291                 return flow_get_mark(skb);
292         case FLOW_KEY_NFCT:
293                 return flow_get_nfct(skb);
294         case FLOW_KEY_NFCT_SRC:
295                 return flow_get_nfct_src(skb);
296         case FLOW_KEY_NFCT_DST:
297                 return flow_get_nfct_dst(skb);
298         case FLOW_KEY_NFCT_PROTO_SRC:
299                 return flow_get_nfct_proto_src(skb);
300         case FLOW_KEY_NFCT_PROTO_DST:
301                 return flow_get_nfct_proto_dst(skb);
302         case FLOW_KEY_RTCLASSID:
303                 return flow_get_rtclassid(skb);
304         case FLOW_KEY_SKUID:
305                 return flow_get_skuid(skb);
306         case FLOW_KEY_SKGID:
307                 return flow_get_skgid(skb);
308         default:
309                 WARN_ON(1);
310                 return 0;
311         }
312 }
313
314 static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
315                          struct tcf_result *res)
316 {
317         struct flow_head *head = tp->root;
318         struct flow_filter *f;
319         u32 keymask;
320         u32 classid;
321         unsigned int n, key;
322         int r;
323
324         list_for_each_entry(f, &head->filters, list) {
325                 u32 keys[f->nkeys];
326
327                 if (!tcf_em_tree_match(skb, &f->ematches, NULL))
328                         continue;
329
330                 keymask = f->keymask;
331
332                 for (n = 0; n < f->nkeys; n++) {
333                         key = ffs(keymask) - 1;
334                         keymask &= ~(1 << key);
335                         keys[n] = flow_key_get(skb, key);
336                 }
337
338                 if (f->mode == FLOW_MODE_HASH)
339                         classid = jhash2(keys, f->nkeys, flow_hashrnd);
340                 else {
341                         classid = keys[0];
342                         classid = (classid & f->mask) ^ f->xor;
343                         classid = (classid >> f->rshift) + f->addend;
344                 }
345
346                 if (f->divisor)
347                         classid %= f->divisor;
348
349                 res->class   = 0;
350                 res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);
351
352                 r = tcf_exts_exec(skb, &f->exts, res);
353                 if (r < 0)
354                         continue;
355                 return r;
356         }
357         return -1;
358 }
359
360 static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
361         [TCA_FLOW_KEYS]         = { .type = NLA_U32 },
362         [TCA_FLOW_MODE]         = { .type = NLA_U32 },
363         [TCA_FLOW_BASECLASS]    = { .type = NLA_U32 },
364         [TCA_FLOW_RSHIFT]       = { .type = NLA_U32 },
365         [TCA_FLOW_ADDEND]       = { .type = NLA_U32 },
366         [TCA_FLOW_MASK]         = { .type = NLA_U32 },
367         [TCA_FLOW_XOR]          = { .type = NLA_U32 },
368         [TCA_FLOW_DIVISOR]      = { .type = NLA_U32 },
369         [TCA_FLOW_ACT]          = { .type = NLA_NESTED },
370         [TCA_FLOW_POLICE]       = { .type = NLA_NESTED },
371         [TCA_FLOW_EMATCHES]     = { .type = NLA_NESTED },
372 };
373
374 static int flow_change(struct tcf_proto *tp, unsigned long base,
375                        u32 handle, struct nlattr **tca,
376                        unsigned long *arg)
377 {
378         struct flow_head *head = tp->root;
379         struct flow_filter *f;
380         struct nlattr *opt = tca[TCA_OPTIONS];
381         struct nlattr *tb[TCA_FLOW_MAX + 1];
382         struct tcf_exts e;
383         struct tcf_ematch_tree t;
384         unsigned int nkeys = 0;
385         u32 baseclass = 0;
386         u32 keymask = 0;
387         u32 mode;
388         int err;
389
390         if (opt == NULL)
391                 return -EINVAL;
392
393         err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
394         if (err < 0)
395                 return err;
396
397         if (tb[TCA_FLOW_BASECLASS]) {
398                 baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
399                 if (TC_H_MIN(baseclass) == 0)
400                         return -EINVAL;
401         }
402
403         if (tb[TCA_FLOW_KEYS]) {
404                 keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);
405
406                 nkeys = hweight32(keymask);
407                 if (nkeys == 0)
408                         return -EINVAL;
409
410                 if (fls(keymask) - 1 > FLOW_KEY_MAX)
411                         return -EOPNOTSUPP;
412         }
413
414         err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
415         if (err < 0)
416                 return err;
417
418         err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
419         if (err < 0)
420                 goto err1;
421
422         f = (struct flow_filter *)*arg;
423         if (f != NULL) {
424                 err = -EINVAL;
425                 if (f->handle != handle && handle)
426                         goto err2;
427
428                 mode = f->mode;
429                 if (tb[TCA_FLOW_MODE])
430                         mode = nla_get_u32(tb[TCA_FLOW_MODE]);
431                 if (mode != FLOW_MODE_HASH && nkeys > 1)
432                         goto err2;
433         } else {
434                 err = -EINVAL;
435                 if (!handle)
436                         goto err2;
437                 if (!tb[TCA_FLOW_KEYS])
438                         goto err2;
439
440                 mode = FLOW_MODE_MAP;
441                 if (tb[TCA_FLOW_MODE])
442                         mode = nla_get_u32(tb[TCA_FLOW_MODE]);
443                 if (mode != FLOW_MODE_HASH && nkeys > 1)
444                         goto err2;
445
446                 if (TC_H_MAJ(baseclass) == 0)
447                         baseclass = TC_H_MAKE(tp->q->handle, baseclass);
448                 if (TC_H_MIN(baseclass) == 0)
449                         baseclass = TC_H_MAKE(baseclass, 1);
450
451                 err = -ENOBUFS;
452                 f = kzalloc(sizeof(*f), GFP_KERNEL);
453                 if (f == NULL)
454                         goto err2;
455
456                 f->handle = handle;
457                 f->mask   = ~0U;
458         }
459
460         tcf_exts_change(tp, &f->exts, &e);
461         tcf_em_tree_change(tp, &f->ematches, &t);
462
463         tcf_tree_lock(tp);
464
465         if (tb[TCA_FLOW_KEYS]) {
466                 f->keymask = keymask;
467                 f->nkeys   = nkeys;
468         }
469
470         f->mode = mode;
471
472         if (tb[TCA_FLOW_MASK])
473                 f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
474         if (tb[TCA_FLOW_XOR])
475                 f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
476         if (tb[TCA_FLOW_RSHIFT])
477                 f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
478         if (tb[TCA_FLOW_ADDEND])
479                 f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
480
481         if (tb[TCA_FLOW_DIVISOR])
482                 f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
483         if (baseclass)
484                 f->baseclass = baseclass;
485
486         if (*arg == 0)
487                 list_add_tail(&f->list, &head->filters);
488
489         tcf_tree_unlock(tp);
490
491         *arg = (unsigned long)f;
492         return 0;
493
494 err2:
495         tcf_em_tree_destroy(tp, &t);
496 err1:
497         tcf_exts_destroy(tp, &e);
498         return err;
499 }
500
501 static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
502 {
503         tcf_exts_destroy(tp, &f->exts);
504         tcf_em_tree_destroy(tp, &f->ematches);
505         kfree(f);
506 }
507
508 static int flow_delete(struct tcf_proto *tp, unsigned long arg)
509 {
510         struct flow_filter *f = (struct flow_filter *)arg;
511
512         tcf_tree_lock(tp);
513         list_del(&f->list);
514         tcf_tree_unlock(tp);
515         flow_destroy_filter(tp, f);
516         return 0;
517 }
518
519 static int flow_init(struct tcf_proto *tp)
520 {
521         struct flow_head *head;
522
523         if (!flow_hashrnd_initted) {
524                 get_random_bytes(&flow_hashrnd, 4);
525                 flow_hashrnd_initted = 1;
526         }
527
528         head = kzalloc(sizeof(*head), GFP_KERNEL);
529         if (head == NULL)
530                 return -ENOBUFS;
531         INIT_LIST_HEAD(&head->filters);
532         tp->root = head;
533         return 0;
534 }
535
536 static void flow_destroy(struct tcf_proto *tp)
537 {
538         struct flow_head *head = tp->root;
539         struct flow_filter *f, *next;
540
541         list_for_each_entry_safe(f, next, &head->filters, list) {
542                 list_del(&f->list);
543                 flow_destroy_filter(tp, f);
544         }
545         kfree(head);
546 }
547
548 static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
549 {
550         struct flow_head *head = tp->root;
551         struct flow_filter *f;
552
553         list_for_each_entry(f, &head->filters, list)
554                 if (f->handle == handle)
555                         return (unsigned long)f;
556         return 0;
557 }
558
559 static void flow_put(struct tcf_proto *tp, unsigned long f)
560 {
561         return;
562 }
563
564 static int flow_dump(struct tcf_proto *tp, unsigned long fh,
565                      struct sk_buff *skb, struct tcmsg *t)
566 {
567         struct flow_filter *f = (struct flow_filter *)fh;
568         struct nlattr *nest;
569
570         if (f == NULL)
571                 return skb->len;
572
573         t->tcm_handle = f->handle;
574
575         nest = nla_nest_start(skb, TCA_OPTIONS);
576         if (nest == NULL)
577                 goto nla_put_failure;
578
579         NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask);
580         NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode);
581
582         if (f->mask != ~0 || f->xor != 0) {
583                 NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask);
584                 NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor);
585         }
586         if (f->rshift)
587                 NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift);
588         if (f->addend)
589                 NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend);
590
591         if (f->divisor)
592                 NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor);
593         if (f->baseclass)
594                 NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass);
595
596         if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
597                 goto nla_put_failure;
598 #ifdef CONFIG_NET_EMATCH
599         if (f->ematches.hdr.nmatches &&
600             tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
601                 goto nla_put_failure;
602 #endif
603         nla_nest_end(skb, nest);
604
605         if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
606                 goto nla_put_failure;
607
608         return skb->len;
609
610 nla_put_failure:
611         nlmsg_trim(skb, nest);
612         return -1;
613 }
614
615 static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
616 {
617         struct flow_head *head = tp->root;
618         struct flow_filter *f;
619
620         list_for_each_entry(f, &head->filters, list) {
621                 if (arg->count < arg->skip)
622                         goto skip;
623                 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
624                         arg->stop = 1;
625                         break;
626                 }
627 skip:
628                 arg->count++;
629         }
630 }
631
632 static struct tcf_proto_ops cls_flow_ops __read_mostly = {
633         .kind           = "flow",
634         .classify       = flow_classify,
635         .init           = flow_init,
636         .destroy        = flow_destroy,
637         .change         = flow_change,
638         .delete         = flow_delete,
639         .get            = flow_get,
640         .put            = flow_put,
641         .dump           = flow_dump,
642         .walk           = flow_walk,
643         .owner          = THIS_MODULE,
644 };
645
646 static int __init cls_flow_init(void)
647 {
648         return register_tcf_proto_ops(&cls_flow_ops);
649 }
650
651 static void __exit cls_flow_exit(void)
652 {
653         unregister_tcf_proto_ops(&cls_flow_ops);
654 }
655
656 module_init(cls_flow_init);
657 module_exit(cls_flow_exit);
658
659 MODULE_LICENSE("GPL");
660 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
661 MODULE_DESCRIPTION("TC flow classifier");