[PKT_SCHED]: Fix dsmark to apply changes consistent
[safe/jmp/linux-2.6] / net / sched / sch_dsmark.c
1 /* net/sched/sch_dsmark.c - Differentiated Services field marker */
2
3 /* Written 1998-2000 by Werner Almesberger, EPFL ICA */
4
5
6 #include <linux/config.h>
7 #include <linux/module.h>
8 #include <linux/init.h>
9 #include <linux/types.h>
10 #include <linux/string.h>
11 #include <linux/errno.h>
12 #include <linux/skbuff.h>
13 #include <linux/netdevice.h> /* for pkt_sched */
14 #include <linux/rtnetlink.h>
15 #include <net/pkt_sched.h>
16 #include <net/dsfield.h>
17 #include <net/inet_ecn.h>
18 #include <asm/byteorder.h>
19
20
21 #if 0 /* control */
22 #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
23 #else
24 #define DPRINTK(format,args...)
25 #endif
26
27 #if 0 /* data */
28 #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
29 #else
30 #define D2PRINTK(format,args...)
31 #endif
32
33
34 #define PRIV(sch) qdisc_priv(sch)
35
36
37 /*
38  * classid      class           marking
39  * -------      -----           -------
40  *   n/a          0             n/a
41  *   x:0          1             use entry [0]
42  *   ...         ...            ...
43  *   x:y y>0     y+1            use entry [y]
44  *   ...         ...            ...
45  * x:indices-1  indices         use entry [indices-1]
46  *   ...         ...            ...
47  *   x:y         y+1            use entry [y & (indices-1)]
48  *   ...         ...            ...
49  * 0xffff       0x10000         use entry [indices-1]
50  */
51
52
53 #define NO_DEFAULT_INDEX        (1 << 16)
54
55 struct dsmark_qdisc_data {
56         struct Qdisc            *q;
57         struct tcf_proto        *filter_list;
58         __u8                    *mask;  /* "owns" the array */
59         __u8                    *value;
60         __u16                   indices;
61         __u32                   default_index;  /* index range is 0...0xffff */
62         int                     set_tc_index;
63 };
64
65 static inline int dsmark_valid_indices(u16 indices)
66 {
67         while (indices != 1) {
68                 if (indices & 1)
69                         return 0;
70                 indices >>= 1;
71         }
72  
73         return 1;
74 }
75
76 static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
77 {
78         return (index <= p->indices && index > 0);
79 }
80
81 /* ------------------------- Class/flow operations ------------------------- */
82
83
84 static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
85     struct Qdisc *new,struct Qdisc **old)
86 {
87         struct dsmark_qdisc_data *p = PRIV(sch);
88
89         DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new,
90             old);
91
92         if (new == NULL) {
93                 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
94                 if (new == NULL)
95                         new = &noop_qdisc;
96         }
97
98         sch_tree_lock(sch);
99         *old = xchg(&p->q,new);
100         if (*old)
101                 qdisc_reset(*old);
102         sch->q.qlen = 0;
103         sch_tree_unlock(sch); /* @@@ move up ? */
104         return 0;
105 }
106
107
108 static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
109 {
110         struct dsmark_qdisc_data *p = PRIV(sch);
111
112         return p->q;
113 }
114
115
116 static unsigned long dsmark_get(struct Qdisc *sch,u32 classid)
117 {
118         struct dsmark_qdisc_data *p __attribute__((unused)) = PRIV(sch);
119
120         DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid);
121         return TC_H_MIN(classid)+1;
122 }
123
124
125 static unsigned long dsmark_bind_filter(struct Qdisc *sch,
126     unsigned long parent, u32 classid)
127 {
128         return dsmark_get(sch,classid);
129 }
130
131
132 static void dsmark_put(struct Qdisc *sch, unsigned long cl)
133 {
134 }
135
136
137 static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
138                          struct rtattr **tca, unsigned long *arg)
139 {
140         struct dsmark_qdisc_data *p = PRIV(sch);
141         struct rtattr *opt = tca[TCA_OPTIONS-1];
142         struct rtattr *tb[TCA_DSMARK_MAX];
143         int err = -EINVAL;
144         u8 mask = 0;
145
146         DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
147                 "arg 0x%lx\n", sch, p, classid, parent, *arg);
148
149         if (!dsmark_valid_index(p, *arg)) {
150                 err = -ENOENT;
151                 goto rtattr_failure;
152         }
153
154         if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
155                 goto rtattr_failure;
156
157         if (tb[TCA_DSMARK_MASK-1])
158                 mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]);
159
160         if (tb[TCA_DSMARK_VALUE-1])
161                 p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]);
162                 
163         if (tb[TCA_DSMARK_MASK-1])
164                 p->mask[*arg-1] = mask;
165
166         err = 0;
167
168 rtattr_failure:
169         return err;
170 }
171
172 static int dsmark_delete(struct Qdisc *sch,unsigned long arg)
173 {
174         struct dsmark_qdisc_data *p = PRIV(sch);
175
176         if (!arg || arg > p->indices)
177                 return -EINVAL;
178         p->mask[arg-1] = 0xff;
179         p->value[arg-1] = 0;
180         return 0;
181 }
182
183
184 static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
185 {
186         struct dsmark_qdisc_data *p = PRIV(sch);
187         int i;
188
189         DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker);
190         if (walker->stop)
191                 return;
192         for (i = 0; i < p->indices; i++) {
193                 if (p->mask[i] == 0xff && !p->value[i])
194                         goto ignore;
195                 if (walker->count >= walker->skip) {
196                         if (walker->fn(sch, i+1, walker) < 0) {
197                                 walker->stop = 1;
198                                 break;
199                         }
200                 }
201 ignore:         
202                 walker->count++;
203         }
204 }
205
206
207 static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
208 {
209         struct dsmark_qdisc_data *p = PRIV(sch);
210
211         return &p->filter_list;
212 }
213
214
215 /* --------------------------- Qdisc operations ---------------------------- */
216
217
218 static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
219 {
220         struct dsmark_qdisc_data *p = PRIV(sch);
221         struct tcf_result res;
222         int result;
223         int ret = NET_XMIT_POLICED;
224
225         D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
226         if (p->set_tc_index) {
227                 /* FIXME: Safe with non-linear skbs? --RR */
228                 switch (skb->protocol) {
229                         case __constant_htons(ETH_P_IP):
230                                 skb->tc_index = ipv4_get_dsfield(skb->nh.iph)
231                                         & ~INET_ECN_MASK;
232                                 break;
233                         case __constant_htons(ETH_P_IPV6):
234                                 skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h)
235                                         & ~INET_ECN_MASK;
236                                 break;
237                         default:
238                                 skb->tc_index = 0;
239                                 break;
240                 };
241         }
242         result = TC_POLICE_OK; /* be nice to gcc */
243         if (TC_H_MAJ(skb->priority) == sch->handle) {
244                 skb->tc_index = TC_H_MIN(skb->priority);
245         } else {
246                 result = tc_classify(skb,p->filter_list,&res);
247                 D2PRINTK("result %d class 0x%04x\n",result,res.classid);
248                 switch (result) {
249 #ifdef CONFIG_NET_CLS_POLICE
250                         case TC_POLICE_SHOT:
251                                 kfree_skb(skb);
252                                 break;
253 #if 0
254                         case TC_POLICE_RECLASSIFY:
255                                 /* FIXME: what to do here ??? */
256 #endif
257 #endif
258                         case TC_POLICE_OK:
259                                 skb->tc_index = TC_H_MIN(res.classid);
260                                 break;
261                         case TC_POLICE_UNSPEC:
262                                 /* fall through */
263                         default:
264                                 if (p->default_index != NO_DEFAULT_INDEX)
265                                         skb->tc_index = p->default_index;
266                                 break;
267                 };
268         }
269         if (
270 #ifdef CONFIG_NET_CLS_POLICE
271             result == TC_POLICE_SHOT ||
272 #endif
273
274             ((ret = p->q->enqueue(skb,p->q)) != 0)) {
275                 sch->qstats.drops++;
276                 return ret;
277         }
278         sch->bstats.bytes += skb->len;
279         sch->bstats.packets++;
280         sch->q.qlen++;
281         return ret;
282 }
283
284
285 static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
286 {
287         struct dsmark_qdisc_data *p = PRIV(sch);
288         struct sk_buff *skb;
289         int index;
290
291         D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n",sch,p);
292         skb = p->q->ops->dequeue(p->q);
293         if (!skb)
294                 return NULL;
295         sch->q.qlen--;
296         index = skb->tc_index & (p->indices-1);
297         D2PRINTK("index %d->%d\n",skb->tc_index,index);
298         switch (skb->protocol) {
299                 case __constant_htons(ETH_P_IP):
300                         ipv4_change_dsfield(skb->nh.iph,
301                             p->mask[index],p->value[index]);
302                         break;
303                 case __constant_htons(ETH_P_IPV6):
304                         ipv6_change_dsfield(skb->nh.ipv6h,
305                             p->mask[index],p->value[index]);
306                         break;
307                 default:
308                         /*
309                          * Only complain if a change was actually attempted.
310                          * This way, we can send non-IP traffic through dsmark
311                          * and don't need yet another qdisc as a bypass.
312                          */
313                         if (p->mask[index] != 0xff || p->value[index])
314                                 printk(KERN_WARNING "dsmark_dequeue: "
315                                        "unsupported protocol %d\n",
316                                        htons(skb->protocol));
317                         break;
318         };
319         return skb;
320 }
321
322
323 static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
324 {
325         int ret;
326         struct dsmark_qdisc_data *p = PRIV(sch);
327
328         D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
329         if ((ret = p->q->ops->requeue(skb, p->q)) == 0) {
330                 sch->q.qlen++;
331                 sch->qstats.requeues++;
332                 return 0;
333         }
334         sch->qstats.drops++;
335         return ret;
336 }
337
338
339 static unsigned int dsmark_drop(struct Qdisc *sch)
340 {
341         struct dsmark_qdisc_data *p = PRIV(sch);
342         unsigned int len;
343         
344         DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p);
345         if (!p->q->ops->drop)
346                 return 0;
347         if (!(len = p->q->ops->drop(p->q)))
348                 return 0;
349         sch->q.qlen--;
350         return len;
351 }
352
353
354 static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
355 {
356         struct dsmark_qdisc_data *p = PRIV(sch);
357         struct rtattr *tb[TCA_DSMARK_MAX];
358         int err = -EINVAL;
359         u32 default_index = NO_DEFAULT_INDEX;
360         u16 indices;
361         u8 *mask;
362
363         DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
364
365         if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0)
366                 goto errout;
367
368         indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]);
369         if (!indices || !dsmark_valid_indices(indices))
370                 goto errout;
371
372         if (tb[TCA_DSMARK_DEFAULT_INDEX-1])
373                 default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
374
375         mask = kmalloc(indices * 2, GFP_KERNEL);
376         if (mask == NULL) {
377                 err = -ENOMEM;
378                 goto errout;
379         }
380
381         p->mask = mask;
382         memset(p->mask, 0xff, indices);
383
384         p->value = p->mask + indices;
385         memset(p->value, 0, indices);
386
387         p->indices = indices;
388         p->default_index = default_index;
389         p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]);
390
391         p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
392         if (p->q == NULL)
393                 p->q = &noop_qdisc;
394
395         DPRINTK("dsmark_init: qdisc %p\n", p->q);
396
397         err = 0;
398 errout:
399 rtattr_failure:
400         return err;
401 }
402
403
404 static void dsmark_reset(struct Qdisc *sch)
405 {
406         struct dsmark_qdisc_data *p = PRIV(sch);
407
408         DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p);
409         qdisc_reset(p->q);
410         sch->q.qlen = 0;
411 }
412
413
414 static void dsmark_destroy(struct Qdisc *sch)
415 {
416         struct dsmark_qdisc_data *p = PRIV(sch);
417         struct tcf_proto *tp;
418
419         DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n",sch,p);
420         while (p->filter_list) {
421                 tp = p->filter_list;
422                 p->filter_list = tp->next;
423                 tcf_destroy(tp);
424         }
425         qdisc_destroy(p->q);
426         kfree(p->mask);
427 }
428
429
430 static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
431     struct sk_buff *skb, struct tcmsg *tcm)
432 {
433         struct dsmark_qdisc_data *p = PRIV(sch);
434         unsigned char *b = skb->tail;
435         struct rtattr *rta;
436
437         DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n",sch,p,cl);
438         if (!cl || cl > p->indices)
439                 return -EINVAL;
440         tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),cl-1);
441         rta = (struct rtattr *) b;
442         RTA_PUT(skb,TCA_OPTIONS,0,NULL);
443         RTA_PUT(skb,TCA_DSMARK_MASK,1,&p->mask[cl-1]);
444         RTA_PUT(skb,TCA_DSMARK_VALUE,1,&p->value[cl-1]);
445         rta->rta_len = skb->tail-b;
446         return skb->len;
447
448 rtattr_failure:
449         skb_trim(skb,b-skb->data);
450         return -1;
451 }
452
453 static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
454 {
455         struct dsmark_qdisc_data *p = PRIV(sch);
456         unsigned char *b = skb->tail;
457         struct rtattr *rta;
458
459         rta = (struct rtattr *) b;
460         RTA_PUT(skb,TCA_OPTIONS,0,NULL);
461         RTA_PUT(skb,TCA_DSMARK_INDICES,sizeof(__u16),&p->indices);
462         if (p->default_index != NO_DEFAULT_INDEX) {
463                 __u16 tmp = p->default_index;
464
465                 RTA_PUT(skb,TCA_DSMARK_DEFAULT_INDEX, sizeof(__u16), &tmp);
466         }
467         if (p->set_tc_index)
468                 RTA_PUT(skb, TCA_DSMARK_SET_TC_INDEX, 0, NULL);
469         rta->rta_len = skb->tail-b;
470         return skb->len;
471
472 rtattr_failure:
473         skb_trim(skb,b-skb->data);
474         return -1;
475 }
476
477 static struct Qdisc_class_ops dsmark_class_ops = {
478         .graft          =       dsmark_graft,
479         .leaf           =       dsmark_leaf,
480         .get            =       dsmark_get,
481         .put            =       dsmark_put,
482         .change         =       dsmark_change,
483         .delete         =       dsmark_delete,
484         .walk           =       dsmark_walk,
485         .tcf_chain      =       dsmark_find_tcf,
486         .bind_tcf       =       dsmark_bind_filter,
487         .unbind_tcf     =       dsmark_put,
488         .dump           =       dsmark_dump_class,
489 };
490
491 static struct Qdisc_ops dsmark_qdisc_ops = {
492         .next           =       NULL,
493         .cl_ops         =       &dsmark_class_ops,
494         .id             =       "dsmark",
495         .priv_size      =       sizeof(struct dsmark_qdisc_data),
496         .enqueue        =       dsmark_enqueue,
497         .dequeue        =       dsmark_dequeue,
498         .requeue        =       dsmark_requeue,
499         .drop           =       dsmark_drop,
500         .init           =       dsmark_init,
501         .reset          =       dsmark_reset,
502         .destroy        =       dsmark_destroy,
503         .change         =       NULL,
504         .dump           =       dsmark_dump,
505         .owner          =       THIS_MODULE,
506 };
507
508 static int __init dsmark_module_init(void)
509 {
510         return register_qdisc(&dsmark_qdisc_ops);
511 }
512 static void __exit dsmark_module_exit(void) 
513 {
514         unregister_qdisc(&dsmark_qdisc_ops);
515 }
516 module_init(dsmark_module_init)
517 module_exit(dsmark_module_exit)
518 MODULE_LICENSE("GPL");