[NET_SCHED]: act_api: qdisc internal reclassify support
[safe/jmp/linux-2.6] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
21
22
23 /*      Class-Based Queueing (CBQ) algorithm.
24         =======================================
25
26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27                  Management Models for Packet Networks",
28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
31
32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
33                  Parameters", 1996
34
35                  [4] Sally Floyd and Michael Speer, "Experimental Results
36                  for Class-Based Queueing", 1998, not published.
37
38         -----------------------------------------------------------------------
39
40         Algorithm skeleton was taken from NS simulator cbq.cc.
41         If someone wants to check this code against the LBL version,
42         he should take into account that ONLY the skeleton was borrowed,
43         the implementation is different. Particularly:
44
45         --- The WRR algorithm is different. Our version looks more
46         reasonable (I hope) and works when quanta are allowed to be
47         less than MTU, which is always the case when real time classes
48         have small rates. Note, that the statement of [3] is
49         incomplete, delay may actually be estimated even if class
50         per-round allotment is less than MTU. Namely, if per-round
51         allotment is W*r_i, and r_1+...+r_k = r < 1
52
53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55         In the worst case we have IntServ estimate with D = W*r+k*MTU
56         and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60         interpret some places, which look like wrong translations
61         from NS. Anyone is advised to find these differences
62         and explain to me, why I am wrong 8).
63
64         --- Linux has no EOI event, so that we cannot estimate true class
65         idle time. Workaround is to consider the next dequeue event
66         as sign that previous packet is finished. This is wrong because of
67         internal device queueing, but on a permanently loaded link it is true.
68         Moreover, combined with clock integrator, this scheme looks
69         very close to an ideal solution.  */
70
71 struct cbq_sched_data;
72
73
74 struct cbq_class
75 {
76         struct cbq_class        *next;          /* hash table link */
77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
78
79 /* Parameters */
80         u32                     classid;
81         unsigned char           priority;       /* class priority */
82         unsigned char           priority2;      /* priority to be used after overlimit */
83         unsigned char           ewma_log;       /* time constant for idle time calculation */
84         unsigned char           ovl_strategy;
85 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
86         unsigned char           police;
87 #endif
88
89         u32                     defmap;
90
91         /* Link-sharing scheduler parameters */
92         long                    maxidle;        /* Class parameters: see below. */
93         long                    offtime;
94         long                    minidle;
95         u32                     avpkt;
96         struct qdisc_rate_table *R_tab;
97
98         /* Overlimit strategy parameters */
99         void                    (*overlimit)(struct cbq_class *cl);
100         psched_tdiff_t          penalty;
101
102         /* General scheduler (WRR) parameters */
103         long                    allot;
104         long                    quantum;        /* Allotment per WRR round */
105         long                    weight;         /* Relative allotment: see below */
106
107         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
108         struct cbq_class        *split;         /* Ptr to split node */
109         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
110         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
111         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
112                                                    parent otherwise */
113         struct cbq_class        *sibling;       /* Sibling chain */
114         struct cbq_class        *children;      /* Pointer to children chain */
115
116         struct Qdisc            *q;             /* Elementary queueing discipline */
117
118
119 /* Variables */
120         unsigned char           cpriority;      /* Effective priority */
121         unsigned char           delayed;
122         unsigned char           level;          /* level of the class in hierarchy:
123                                                    0 for leaf classes, and maximal
124                                                    level of children + 1 for nodes.
125                                                  */
126
127         psched_time_t           last;           /* Last end of service */
128         psched_time_t           undertime;
129         long                    avgidle;
130         long                    deficit;        /* Saved deficit for WRR */
131         psched_time_t           penalized;
132         struct gnet_stats_basic bstats;
133         struct gnet_stats_queue qstats;
134         struct gnet_stats_rate_est rate_est;
135         struct tc_cbq_xstats    xstats;
136
137         struct tcf_proto        *filter_list;
138
139         int                     refcnt;
140         int                     filters;
141
142         struct cbq_class        *defaults[TC_PRIO_MAX+1];
143 };
144
145 struct cbq_sched_data
146 {
147         struct cbq_class        *classes[16];           /* Hash table of all classes */
148         int                     nclasses[TC_CBQ_MAXPRIO+1];
149         unsigned                quanta[TC_CBQ_MAXPRIO+1];
150
151         struct cbq_class        link;
152
153         unsigned                activemask;
154         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
155                                                                    with backlog */
156
157 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
158         struct cbq_class        *rx_class;
159 #endif
160         struct cbq_class        *tx_class;
161         struct cbq_class        *tx_borrowed;
162         int                     tx_len;
163         psched_time_t           now;            /* Cached timestamp */
164         psched_time_t           now_rt;         /* Cached real time */
165         unsigned                pmask;
166
167         struct hrtimer          delay_timer;
168         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
169                                                    started when CBQ has
170                                                    backlog, but cannot
171                                                    transmit just now */
172         psched_tdiff_t          wd_expires;
173         int                     toplevel;
174         u32                     hgenerator;
175 };
176
177
178 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
179
180
181 static __inline__ unsigned cbq_hash(u32 h)
182 {
183         h ^= h>>8;
184         h ^= h>>4;
185         return h&0xF;
186 }
187
188 static __inline__ struct cbq_class *
189 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
190 {
191         struct cbq_class *cl;
192
193         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
194                 if (cl->classid == classid)
195                         return cl;
196         return NULL;
197 }
198
199 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
200
201 static struct cbq_class *
202 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
203 {
204         struct cbq_class *cl, *new;
205
206         for (cl = this->tparent; cl; cl = cl->tparent)
207                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
208                         return new;
209
210         return NULL;
211 }
212
213 #endif
214
215 /* Classify packet. The procedure is pretty complicated, but
216    it allows us to combine link sharing and priority scheduling
217    transparently.
218
219    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
220    so that it resolves to split nodes. Then packets are classified
221    by logical priority, or a more specific classifier may be attached
222    to the split node.
223  */
224
225 static struct cbq_class *
226 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
227 {
228         struct cbq_sched_data *q = qdisc_priv(sch);
229         struct cbq_class *head = &q->link;
230         struct cbq_class **defmap;
231         struct cbq_class *cl = NULL;
232         u32 prio = skb->priority;
233         struct tcf_result res;
234
235         /*
236          *  Step 1. If skb->priority points to one of our classes, use it.
237          */
238         if (TC_H_MAJ(prio^sch->handle) == 0 &&
239             (cl = cbq_class_lookup(q, prio)) != NULL)
240                 return cl;
241
242         *qerr = NET_XMIT_BYPASS;
243         for (;;) {
244                 int result = 0;
245                 defmap = head->defaults;
246
247                 /*
248                  * Step 2+n. Apply classifier.
249                  */
250                 if (!head->filter_list ||
251                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
252                         goto fallback;
253
254                 if ((cl = (void*)res.class) == NULL) {
255                         if (TC_H_MAJ(res.classid))
256                                 cl = cbq_class_lookup(q, res.classid);
257                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
258                                 cl = defmap[TC_PRIO_BESTEFFORT];
259
260                         if (cl == NULL || cl->level >= head->level)
261                                 goto fallback;
262                 }
263
264 #ifdef CONFIG_NET_CLS_ACT
265                 switch (result) {
266                 case TC_ACT_QUEUED:
267                 case TC_ACT_STOLEN:
268                         *qerr = NET_XMIT_SUCCESS;
269                 case TC_ACT_SHOT:
270                         return NULL;
271                 case TC_ACT_RECLASSIFY:
272                         return cbq_reclassify(skb, cl);
273                 }
274 #elif defined(CONFIG_NET_CLS_POLICE)
275                 switch (result) {
276                 case TC_POLICE_RECLASSIFY:
277                         return cbq_reclassify(skb, cl);
278                 case TC_POLICE_SHOT:
279                         return NULL;
280                 default:
281                         break;
282                 }
283 #endif
284                 if (cl->level == 0)
285                         return cl;
286
287                 /*
288                  * Step 3+n. If classifier selected a link sharing class,
289                  *         apply agency specific classifier.
290                  *         Repeat this procdure until we hit a leaf node.
291                  */
292                 head = cl;
293         }
294
295 fallback:
296         cl = head;
297
298         /*
299          * Step 4. No success...
300          */
301         if (TC_H_MAJ(prio) == 0 &&
302             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
303             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
304                 return head;
305
306         return cl;
307 }
308
309 /*
310    A packet has just been enqueued on the empty class.
311    cbq_activate_class adds it to the tail of active class list
312    of its priority band.
313  */
314
315 static __inline__ void cbq_activate_class(struct cbq_class *cl)
316 {
317         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
318         int prio = cl->cpriority;
319         struct cbq_class *cl_tail;
320
321         cl_tail = q->active[prio];
322         q->active[prio] = cl;
323
324         if (cl_tail != NULL) {
325                 cl->next_alive = cl_tail->next_alive;
326                 cl_tail->next_alive = cl;
327         } else {
328                 cl->next_alive = cl;
329                 q->activemask |= (1<<prio);
330         }
331 }
332
333 /*
334    Unlink class from active chain.
335    Note that this same procedure is done directly in cbq_dequeue*
336    during round-robin procedure.
337  */
338
339 static void cbq_deactivate_class(struct cbq_class *this)
340 {
341         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
342         int prio = this->cpriority;
343         struct cbq_class *cl;
344         struct cbq_class *cl_prev = q->active[prio];
345
346         do {
347                 cl = cl_prev->next_alive;
348                 if (cl == this) {
349                         cl_prev->next_alive = cl->next_alive;
350                         cl->next_alive = NULL;
351
352                         if (cl == q->active[prio]) {
353                                 q->active[prio] = cl_prev;
354                                 if (cl == q->active[prio]) {
355                                         q->active[prio] = NULL;
356                                         q->activemask &= ~(1<<prio);
357                                         return;
358                                 }
359                         }
360                         return;
361                 }
362         } while ((cl_prev = cl) != q->active[prio]);
363 }
364
365 static void
366 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
367 {
368         int toplevel = q->toplevel;
369
370         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
371                 psched_time_t now;
372                 psched_tdiff_t incr;
373
374                 now = psched_get_time();
375                 incr = now - q->now_rt;
376                 now = q->now + incr;
377
378                 do {
379                         if (cl->undertime < now) {
380                                 q->toplevel = cl->level;
381                                 return;
382                         }
383                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
384         }
385 }
386
387 static int
388 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
389 {
390         struct cbq_sched_data *q = qdisc_priv(sch);
391         int len = skb->len;
392         int ret;
393         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
394
395 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
396         q->rx_class = cl;
397 #endif
398         if (cl == NULL) {
399                 if (ret == NET_XMIT_BYPASS)
400                         sch->qstats.drops++;
401                 kfree_skb(skb);
402                 return ret;
403         }
404
405 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
406         cl->q->__parent = sch;
407 #endif
408         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
409                 sch->q.qlen++;
410                 sch->bstats.packets++;
411                 sch->bstats.bytes+=len;
412                 cbq_mark_toplevel(q, cl);
413                 if (!cl->next_alive)
414                         cbq_activate_class(cl);
415                 return ret;
416         }
417
418         sch->qstats.drops++;
419         cbq_mark_toplevel(q, cl);
420         cl->qstats.drops++;
421         return ret;
422 }
423
424 static int
425 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
426 {
427         struct cbq_sched_data *q = qdisc_priv(sch);
428         struct cbq_class *cl;
429         int ret;
430
431         if ((cl = q->tx_class) == NULL) {
432                 kfree_skb(skb);
433                 sch->qstats.drops++;
434                 return NET_XMIT_CN;
435         }
436         q->tx_class = NULL;
437
438         cbq_mark_toplevel(q, cl);
439
440 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
441         q->rx_class = cl;
442         cl->q->__parent = sch;
443 #endif
444         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
445                 sch->q.qlen++;
446                 sch->qstats.requeues++;
447                 if (!cl->next_alive)
448                         cbq_activate_class(cl);
449                 return 0;
450         }
451         sch->qstats.drops++;
452         cl->qstats.drops++;
453         return ret;
454 }
455
456 /* Overlimit actions */
457
458 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
459
460 static void cbq_ovl_classic(struct cbq_class *cl)
461 {
462         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
463         psched_tdiff_t delay = cl->undertime - q->now;
464
465         if (!cl->delayed) {
466                 delay += cl->offtime;
467
468                 /*
469                    Class goes to sleep, so that it will have no
470                    chance to work avgidle. Let's forgive it 8)
471
472                    BTW cbq-2.0 has a crap in this
473                    place, apparently they forgot to shift it by cl->ewma_log.
474                  */
475                 if (cl->avgidle < 0)
476                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
477                 if (cl->avgidle < cl->minidle)
478                         cl->avgidle = cl->minidle;
479                 if (delay <= 0)
480                         delay = 1;
481                 cl->undertime = q->now + delay;
482
483                 cl->xstats.overactions++;
484                 cl->delayed = 1;
485         }
486         if (q->wd_expires == 0 || q->wd_expires > delay)
487                 q->wd_expires = delay;
488
489         /* Dirty work! We must schedule wakeups based on
490            real available rate, rather than leaf rate,
491            which may be tiny (even zero).
492          */
493         if (q->toplevel == TC_CBQ_MAXLEVEL) {
494                 struct cbq_class *b;
495                 psched_tdiff_t base_delay = q->wd_expires;
496
497                 for (b = cl->borrow; b; b = b->borrow) {
498                         delay = b->undertime - q->now;
499                         if (delay < base_delay) {
500                                 if (delay <= 0)
501                                         delay = 1;
502                                 base_delay = delay;
503                         }
504                 }
505
506                 q->wd_expires = base_delay;
507         }
508 }
509
510 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
511    they go overlimit
512  */
513
514 static void cbq_ovl_rclassic(struct cbq_class *cl)
515 {
516         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
517         struct cbq_class *this = cl;
518
519         do {
520                 if (cl->level > q->toplevel) {
521                         cl = NULL;
522                         break;
523                 }
524         } while ((cl = cl->borrow) != NULL);
525
526         if (cl == NULL)
527                 cl = this;
528         cbq_ovl_classic(cl);
529 }
530
531 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
532
533 static void cbq_ovl_delay(struct cbq_class *cl)
534 {
535         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
536         psched_tdiff_t delay = cl->undertime - q->now;
537
538         if (!cl->delayed) {
539                 psched_time_t sched = q->now;
540                 ktime_t expires;
541
542                 delay += cl->offtime;
543                 if (cl->avgidle < 0)
544                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
545                 if (cl->avgidle < cl->minidle)
546                         cl->avgidle = cl->minidle;
547                 cl->undertime = q->now + delay;
548
549                 if (delay > 0) {
550                         sched += delay + cl->penalty;
551                         cl->penalized = sched;
552                         cl->cpriority = TC_CBQ_MAXPRIO;
553                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
554
555                         expires = ktime_set(0, 0);
556                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
557                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
558                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
559                                                   expires)) > 0)
560                                 q->delay_timer.expires = expires;
561                         hrtimer_restart(&q->delay_timer);
562                         cl->delayed = 1;
563                         cl->xstats.overactions++;
564                         return;
565                 }
566                 delay = 1;
567         }
568         if (q->wd_expires == 0 || q->wd_expires > delay)
569                 q->wd_expires = delay;
570 }
571
572 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
573
574 static void cbq_ovl_lowprio(struct cbq_class *cl)
575 {
576         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
577
578         cl->penalized = q->now + cl->penalty;
579
580         if (cl->cpriority != cl->priority2) {
581                 cl->cpriority = cl->priority2;
582                 q->pmask |= (1<<cl->cpriority);
583                 cl->xstats.overactions++;
584         }
585         cbq_ovl_classic(cl);
586 }
587
588 /* TC_CBQ_OVL_DROP: penalize class by dropping */
589
590 static void cbq_ovl_drop(struct cbq_class *cl)
591 {
592         if (cl->q->ops->drop)
593                 if (cl->q->ops->drop(cl->q))
594                         cl->qdisc->q.qlen--;
595         cl->xstats.overactions++;
596         cbq_ovl_classic(cl);
597 }
598
599 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
600                                        psched_time_t now)
601 {
602         struct cbq_class *cl;
603         struct cbq_class *cl_prev = q->active[prio];
604         psched_time_t sched = now;
605
606         if (cl_prev == NULL)
607                 return 0;
608
609         do {
610                 cl = cl_prev->next_alive;
611                 if (now - cl->penalized > 0) {
612                         cl_prev->next_alive = cl->next_alive;
613                         cl->next_alive = NULL;
614                         cl->cpriority = cl->priority;
615                         cl->delayed = 0;
616                         cbq_activate_class(cl);
617
618                         if (cl == q->active[prio]) {
619                                 q->active[prio] = cl_prev;
620                                 if (cl == q->active[prio]) {
621                                         q->active[prio] = NULL;
622                                         return 0;
623                                 }
624                         }
625
626                         cl = cl_prev->next_alive;
627                 } else if (sched - cl->penalized > 0)
628                         sched = cl->penalized;
629         } while ((cl_prev = cl) != q->active[prio]);
630
631         return sched - now;
632 }
633
634 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
635 {
636         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
637                                                 delay_timer);
638         struct Qdisc *sch = q->watchdog.qdisc;
639         psched_time_t now;
640         psched_tdiff_t delay = 0;
641         unsigned pmask;
642
643         now = psched_get_time();
644
645         pmask = q->pmask;
646         q->pmask = 0;
647
648         while (pmask) {
649                 int prio = ffz(~pmask);
650                 psched_tdiff_t tmp;
651
652                 pmask &= ~(1<<prio);
653
654                 tmp = cbq_undelay_prio(q, prio, now);
655                 if (tmp > 0) {
656                         q->pmask |= 1<<prio;
657                         if (tmp < delay || delay == 0)
658                                 delay = tmp;
659                 }
660         }
661
662         if (delay) {
663                 ktime_t time;
664
665                 time = ktime_set(0, 0);
666                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
667                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
668         }
669
670         sch->flags &= ~TCQ_F_THROTTLED;
671         netif_schedule(sch->dev);
672         return HRTIMER_NORESTART;
673 }
674
675
676 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
677
678 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
679 {
680         int len = skb->len;
681         struct Qdisc *sch = child->__parent;
682         struct cbq_sched_data *q = qdisc_priv(sch);
683         struct cbq_class *cl = q->rx_class;
684
685         q->rx_class = NULL;
686
687         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
688
689                 cbq_mark_toplevel(q, cl);
690
691                 q->rx_class = cl;
692                 cl->q->__parent = sch;
693
694                 if (cl->q->enqueue(skb, cl->q) == 0) {
695                         sch->q.qlen++;
696                         sch->bstats.packets++;
697                         sch->bstats.bytes+=len;
698                         if (!cl->next_alive)
699                                 cbq_activate_class(cl);
700                         return 0;
701                 }
702                 sch->qstats.drops++;
703                 return 0;
704         }
705
706         sch->qstats.drops++;
707         return -1;
708 }
709 #endif
710
711 /*
712    It is mission critical procedure.
713
714    We "regenerate" toplevel cutoff, if transmitting class
715    has backlog and it is not regulated. It is not part of
716    original CBQ description, but looks more reasonable.
717    Probably, it is wrong. This question needs further investigation.
718 */
719
720 static __inline__ void
721 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
722                     struct cbq_class *borrowed)
723 {
724         if (cl && q->toplevel >= borrowed->level) {
725                 if (cl->q->q.qlen > 1) {
726                         do {
727                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
728                                         q->toplevel = borrowed->level;
729                                         return;
730                                 }
731                         } while ((borrowed=borrowed->borrow) != NULL);
732                 }
733 #if 0
734         /* It is not necessary now. Uncommenting it
735            will save CPU cycles, but decrease fairness.
736          */
737                 q->toplevel = TC_CBQ_MAXLEVEL;
738 #endif
739         }
740 }
741
742 static void
743 cbq_update(struct cbq_sched_data *q)
744 {
745         struct cbq_class *this = q->tx_class;
746         struct cbq_class *cl = this;
747         int len = q->tx_len;
748
749         q->tx_class = NULL;
750
751         for ( ; cl; cl = cl->share) {
752                 long avgidle = cl->avgidle;
753                 long idle;
754
755                 cl->bstats.packets++;
756                 cl->bstats.bytes += len;
757
758                 /*
759                    (now - last) is total time between packet right edges.
760                    (last_pktlen/rate) is "virtual" busy time, so that
761
762                          idle = (now - last) - last_pktlen/rate
763                  */
764
765                 idle = q->now - cl->last;
766                 if ((unsigned long)idle > 128*1024*1024) {
767                         avgidle = cl->maxidle;
768                 } else {
769                         idle -= L2T(cl, len);
770
771                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
772                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
773                    cl->avgidle == true_avgidle/W,
774                    hence:
775                  */
776                         avgidle += idle - (avgidle>>cl->ewma_log);
777                 }
778
779                 if (avgidle <= 0) {
780                         /* Overlimit or at-limit */
781
782                         if (avgidle < cl->minidle)
783                                 avgidle = cl->minidle;
784
785                         cl->avgidle = avgidle;
786
787                         /* Calculate expected time, when this class
788                            will be allowed to send.
789                            It will occur, when:
790                            (1-W)*true_avgidle + W*delay = 0, i.e.
791                            idle = (1/W - 1)*(-true_avgidle)
792                            or
793                            idle = (1 - W)*(-cl->avgidle);
794                          */
795                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
796
797                         /*
798                            That is not all.
799                            To maintain the rate allocated to the class,
800                            we add to undertime virtual clock,
801                            necessary to complete transmitted packet.
802                            (len/phys_bandwidth has been already passed
803                            to the moment of cbq_update)
804                          */
805
806                         idle -= L2T(&q->link, len);
807                         idle += L2T(cl, len);
808
809                         cl->undertime = q->now + idle;
810                 } else {
811                         /* Underlimit */
812
813                         cl->undertime = PSCHED_PASTPERFECT;
814                         if (avgidle > cl->maxidle)
815                                 cl->avgidle = cl->maxidle;
816                         else
817                                 cl->avgidle = avgidle;
818                 }
819                 cl->last = q->now;
820         }
821
822         cbq_update_toplevel(q, this, q->tx_borrowed);
823 }
824
825 static __inline__ struct cbq_class *
826 cbq_under_limit(struct cbq_class *cl)
827 {
828         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
829         struct cbq_class *this_cl = cl;
830
831         if (cl->tparent == NULL)
832                 return cl;
833
834         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
835                 cl->delayed = 0;
836                 return cl;
837         }
838
839         do {
840                 /* It is very suspicious place. Now overlimit
841                    action is generated for not bounded classes
842                    only if link is completely congested.
843                    Though it is in agree with ancestor-only paradigm,
844                    it looks very stupid. Particularly,
845                    it means that this chunk of code will either
846                    never be called or result in strong amplification
847                    of burstiness. Dangerous, silly, and, however,
848                    no another solution exists.
849                  */
850                 if ((cl = cl->borrow) == NULL) {
851                         this_cl->qstats.overlimits++;
852                         this_cl->overlimit(this_cl);
853                         return NULL;
854                 }
855                 if (cl->level > q->toplevel)
856                         return NULL;
857         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
858
859         cl->delayed = 0;
860         return cl;
861 }
862
863 static __inline__ struct sk_buff *
864 cbq_dequeue_prio(struct Qdisc *sch, int prio)
865 {
866         struct cbq_sched_data *q = qdisc_priv(sch);
867         struct cbq_class *cl_tail, *cl_prev, *cl;
868         struct sk_buff *skb;
869         int deficit;
870
871         cl_tail = cl_prev = q->active[prio];
872         cl = cl_prev->next_alive;
873
874         do {
875                 deficit = 0;
876
877                 /* Start round */
878                 do {
879                         struct cbq_class *borrow = cl;
880
881                         if (cl->q->q.qlen &&
882                             (borrow = cbq_under_limit(cl)) == NULL)
883                                 goto skip_class;
884
885                         if (cl->deficit <= 0) {
886                                 /* Class exhausted its allotment per
887                                    this round. Switch to the next one.
888                                  */
889                                 deficit = 1;
890                                 cl->deficit += cl->quantum;
891                                 goto next_class;
892                         }
893
894                         skb = cl->q->dequeue(cl->q);
895
896                         /* Class did not give us any skb :-(
897                            It could occur even if cl->q->q.qlen != 0
898                            f.e. if cl->q == "tbf"
899                          */
900                         if (skb == NULL)
901                                 goto skip_class;
902
903                         cl->deficit -= skb->len;
904                         q->tx_class = cl;
905                         q->tx_borrowed = borrow;
906                         if (borrow != cl) {
907 #ifndef CBQ_XSTATS_BORROWS_BYTES
908                                 borrow->xstats.borrows++;
909                                 cl->xstats.borrows++;
910 #else
911                                 borrow->xstats.borrows += skb->len;
912                                 cl->xstats.borrows += skb->len;
913 #endif
914                         }
915                         q->tx_len = skb->len;
916
917                         if (cl->deficit <= 0) {
918                                 q->active[prio] = cl;
919                                 cl = cl->next_alive;
920                                 cl->deficit += cl->quantum;
921                         }
922                         return skb;
923
924 skip_class:
925                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
926                                 /* Class is empty or penalized.
927                                    Unlink it from active chain.
928                                  */
929                                 cl_prev->next_alive = cl->next_alive;
930                                 cl->next_alive = NULL;
931
932                                 /* Did cl_tail point to it? */
933                                 if (cl == cl_tail) {
934                                         /* Repair it! */
935                                         cl_tail = cl_prev;
936
937                                         /* Was it the last class in this band? */
938                                         if (cl == cl_tail) {
939                                                 /* Kill the band! */
940                                                 q->active[prio] = NULL;
941                                                 q->activemask &= ~(1<<prio);
942                                                 if (cl->q->q.qlen)
943                                                         cbq_activate_class(cl);
944                                                 return NULL;
945                                         }
946
947                                         q->active[prio] = cl_tail;
948                                 }
949                                 if (cl->q->q.qlen)
950                                         cbq_activate_class(cl);
951
952                                 cl = cl_prev;
953                         }
954
955 next_class:
956                         cl_prev = cl;
957                         cl = cl->next_alive;
958                 } while (cl_prev != cl_tail);
959         } while (deficit);
960
961         q->active[prio] = cl_prev;
962
963         return NULL;
964 }
965
966 static __inline__ struct sk_buff *
967 cbq_dequeue_1(struct Qdisc *sch)
968 {
969         struct cbq_sched_data *q = qdisc_priv(sch);
970         struct sk_buff *skb;
971         unsigned activemask;
972
973         activemask = q->activemask&0xFF;
974         while (activemask) {
975                 int prio = ffz(~activemask);
976                 activemask &= ~(1<<prio);
977                 skb = cbq_dequeue_prio(sch, prio);
978                 if (skb)
979                         return skb;
980         }
981         return NULL;
982 }
983
984 static struct sk_buff *
985 cbq_dequeue(struct Qdisc *sch)
986 {
987         struct sk_buff *skb;
988         struct cbq_sched_data *q = qdisc_priv(sch);
989         psched_time_t now;
990         psched_tdiff_t incr;
991
992         now = psched_get_time();
993         incr = now - q->now_rt;
994
995         if (q->tx_class) {
996                 psched_tdiff_t incr2;
997                 /* Time integrator. We calculate EOS time
998                    by adding expected packet transmission time.
999                    If real time is greater, we warp artificial clock,
1000                    so that:
1001
1002                    cbq_time = max(real_time, work);
1003                  */
1004                 incr2 = L2T(&q->link, q->tx_len);
1005                 q->now += incr2;
1006                 cbq_update(q);
1007                 if ((incr -= incr2) < 0)
1008                         incr = 0;
1009         }
1010         q->now += incr;
1011         q->now_rt = now;
1012
1013         for (;;) {
1014                 q->wd_expires = 0;
1015
1016                 skb = cbq_dequeue_1(sch);
1017                 if (skb) {
1018                         sch->q.qlen--;
1019                         sch->flags &= ~TCQ_F_THROTTLED;
1020                         return skb;
1021                 }
1022
1023                 /* All the classes are overlimit.
1024
1025                    It is possible, if:
1026
1027                    1. Scheduler is empty.
1028                    2. Toplevel cutoff inhibited borrowing.
1029                    3. Root class is overlimit.
1030
1031                    Reset 2d and 3d conditions and retry.
1032
1033                    Note, that NS and cbq-2.0 are buggy, peeking
1034                    an arbitrary class is appropriate for ancestor-only
1035                    sharing, but not for toplevel algorithm.
1036
1037                    Our version is better, but slower, because it requires
1038                    two passes, but it is unavoidable with top-level sharing.
1039                 */
1040
1041                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1042                     q->link.undertime == PSCHED_PASTPERFECT)
1043                         break;
1044
1045                 q->toplevel = TC_CBQ_MAXLEVEL;
1046                 q->link.undertime = PSCHED_PASTPERFECT;
1047         }
1048
1049         /* No packets in scheduler or nobody wants to give them to us :-(
1050            Sigh... start watchdog timer in the last case. */
1051
1052         if (sch->q.qlen) {
1053                 sch->qstats.overlimits++;
1054                 if (q->wd_expires)
1055                         qdisc_watchdog_schedule(&q->watchdog,
1056                                                 now + q->wd_expires);
1057         }
1058         return NULL;
1059 }
1060
1061 /* CBQ class maintanance routines */
1062
1063 static void cbq_adjust_levels(struct cbq_class *this)
1064 {
1065         if (this == NULL)
1066                 return;
1067
1068         do {
1069                 int level = 0;
1070                 struct cbq_class *cl;
1071
1072                 if ((cl = this->children) != NULL) {
1073                         do {
1074                                 if (cl->level > level)
1075                                         level = cl->level;
1076                         } while ((cl = cl->sibling) != this->children);
1077                 }
1078                 this->level = level+1;
1079         } while ((this = this->tparent) != NULL);
1080 }
1081
1082 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1083 {
1084         struct cbq_class *cl;
1085         unsigned h;
1086
1087         if (q->quanta[prio] == 0)
1088                 return;
1089
1090         for (h=0; h<16; h++) {
1091                 for (cl = q->classes[h]; cl; cl = cl->next) {
1092                         /* BUGGGG... Beware! This expression suffer of
1093                            arithmetic overflows!
1094                          */
1095                         if (cl->priority == prio) {
1096                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1097                                         q->quanta[prio];
1098                         }
1099                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1100                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1101                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1102                         }
1103                 }
1104         }
1105 }
1106
1107 static void cbq_sync_defmap(struct cbq_class *cl)
1108 {
1109         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1110         struct cbq_class *split = cl->split;
1111         unsigned h;
1112         int i;
1113
1114         if (split == NULL)
1115                 return;
1116
1117         for (i=0; i<=TC_PRIO_MAX; i++) {
1118                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1119                         split->defaults[i] = NULL;
1120         }
1121
1122         for (i=0; i<=TC_PRIO_MAX; i++) {
1123                 int level = split->level;
1124
1125                 if (split->defaults[i])
1126                         continue;
1127
1128                 for (h=0; h<16; h++) {
1129                         struct cbq_class *c;
1130
1131                         for (c = q->classes[h]; c; c = c->next) {
1132                                 if (c->split == split && c->level < level &&
1133                                     c->defmap&(1<<i)) {
1134                                         split->defaults[i] = c;
1135                                         level = c->level;
1136                                 }
1137                         }
1138                 }
1139         }
1140 }
1141
1142 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1143 {
1144         struct cbq_class *split = NULL;
1145
1146         if (splitid == 0) {
1147                 if ((split = cl->split) == NULL)
1148                         return;
1149                 splitid = split->classid;
1150         }
1151
1152         if (split == NULL || split->classid != splitid) {
1153                 for (split = cl->tparent; split; split = split->tparent)
1154                         if (split->classid == splitid)
1155                                 break;
1156         }
1157
1158         if (split == NULL)
1159                 return;
1160
1161         if (cl->split != split) {
1162                 cl->defmap = 0;
1163                 cbq_sync_defmap(cl);
1164                 cl->split = split;
1165                 cl->defmap = def&mask;
1166         } else
1167                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1168
1169         cbq_sync_defmap(cl);
1170 }
1171
1172 static void cbq_unlink_class(struct cbq_class *this)
1173 {
1174         struct cbq_class *cl, **clp;
1175         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1176
1177         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1178                 if (cl == this) {
1179                         *clp = cl->next;
1180                         cl->next = NULL;
1181                         break;
1182                 }
1183         }
1184
1185         if (this->tparent) {
1186                 clp=&this->sibling;
1187                 cl = *clp;
1188                 do {
1189                         if (cl == this) {
1190                                 *clp = cl->sibling;
1191                                 break;
1192                         }
1193                         clp = &cl->sibling;
1194                 } while ((cl = *clp) != this->sibling);
1195
1196                 if (this->tparent->children == this) {
1197                         this->tparent->children = this->sibling;
1198                         if (this->sibling == this)
1199                                 this->tparent->children = NULL;
1200                 }
1201         } else {
1202                 BUG_TRAP(this->sibling == this);
1203         }
1204 }
1205
1206 static void cbq_link_class(struct cbq_class *this)
1207 {
1208         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1209         unsigned h = cbq_hash(this->classid);
1210         struct cbq_class *parent = this->tparent;
1211
1212         this->sibling = this;
1213         this->next = q->classes[h];
1214         q->classes[h] = this;
1215
1216         if (parent == NULL)
1217                 return;
1218
1219         if (parent->children == NULL) {
1220                 parent->children = this;
1221         } else {
1222                 this->sibling = parent->children->sibling;
1223                 parent->children->sibling = this;
1224         }
1225 }
1226
1227 static unsigned int cbq_drop(struct Qdisc* sch)
1228 {
1229         struct cbq_sched_data *q = qdisc_priv(sch);
1230         struct cbq_class *cl, *cl_head;
1231         int prio;
1232         unsigned int len;
1233
1234         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1235                 if ((cl_head = q->active[prio]) == NULL)
1236                         continue;
1237
1238                 cl = cl_head;
1239                 do {
1240                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1241                                 sch->q.qlen--;
1242                                 if (!cl->q->q.qlen)
1243                                         cbq_deactivate_class(cl);
1244                                 return len;
1245                         }
1246                 } while ((cl = cl->next_alive) != cl_head);
1247         }
1248         return 0;
1249 }
1250
1251 static void
1252 cbq_reset(struct Qdisc* sch)
1253 {
1254         struct cbq_sched_data *q = qdisc_priv(sch);
1255         struct cbq_class *cl;
1256         int prio;
1257         unsigned h;
1258
1259         q->activemask = 0;
1260         q->pmask = 0;
1261         q->tx_class = NULL;
1262         q->tx_borrowed = NULL;
1263         qdisc_watchdog_cancel(&q->watchdog);
1264         hrtimer_cancel(&q->delay_timer);
1265         q->toplevel = TC_CBQ_MAXLEVEL;
1266         q->now = psched_get_time();
1267         q->now_rt = q->now;
1268
1269         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1270                 q->active[prio] = NULL;
1271
1272         for (h = 0; h < 16; h++) {
1273                 for (cl = q->classes[h]; cl; cl = cl->next) {
1274                         qdisc_reset(cl->q);
1275
1276                         cl->next_alive = NULL;
1277                         cl->undertime = PSCHED_PASTPERFECT;
1278                         cl->avgidle = cl->maxidle;
1279                         cl->deficit = cl->quantum;
1280                         cl->cpriority = cl->priority;
1281                 }
1282         }
1283         sch->q.qlen = 0;
1284 }
1285
1286
1287 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1288 {
1289         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1290                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1291                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1292         }
1293         if (lss->change&TCF_CBQ_LSS_EWMA)
1294                 cl->ewma_log = lss->ewma_log;
1295         if (lss->change&TCF_CBQ_LSS_AVPKT)
1296                 cl->avpkt = lss->avpkt;
1297         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1298                 cl->minidle = -(long)lss->minidle;
1299         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1300                 cl->maxidle = lss->maxidle;
1301                 cl->avgidle = lss->maxidle;
1302         }
1303         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1304                 cl->offtime = lss->offtime;
1305         return 0;
1306 }
1307
1308 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1309 {
1310         q->nclasses[cl->priority]--;
1311         q->quanta[cl->priority] -= cl->weight;
1312         cbq_normalize_quanta(q, cl->priority);
1313 }
1314
1315 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1316 {
1317         q->nclasses[cl->priority]++;
1318         q->quanta[cl->priority] += cl->weight;
1319         cbq_normalize_quanta(q, cl->priority);
1320 }
1321
1322 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1323 {
1324         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1325
1326         if (wrr->allot)
1327                 cl->allot = wrr->allot;
1328         if (wrr->weight)
1329                 cl->weight = wrr->weight;
1330         if (wrr->priority) {
1331                 cl->priority = wrr->priority-1;
1332                 cl->cpriority = cl->priority;
1333                 if (cl->priority >= cl->priority2)
1334                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1335         }
1336
1337         cbq_addprio(q, cl);
1338         return 0;
1339 }
1340
1341 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1342 {
1343         switch (ovl->strategy) {
1344         case TC_CBQ_OVL_CLASSIC:
1345                 cl->overlimit = cbq_ovl_classic;
1346                 break;
1347         case TC_CBQ_OVL_DELAY:
1348                 cl->overlimit = cbq_ovl_delay;
1349                 break;
1350         case TC_CBQ_OVL_LOWPRIO:
1351                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1352                     ovl->priority2-1 <= cl->priority)
1353                         return -EINVAL;
1354                 cl->priority2 = ovl->priority2-1;
1355                 cl->overlimit = cbq_ovl_lowprio;
1356                 break;
1357         case TC_CBQ_OVL_DROP:
1358                 cl->overlimit = cbq_ovl_drop;
1359                 break;
1360         case TC_CBQ_OVL_RCLASSIC:
1361                 cl->overlimit = cbq_ovl_rclassic;
1362                 break;
1363         default:
1364                 return -EINVAL;
1365         }
1366         cl->penalty = ovl->penalty;
1367         return 0;
1368 }
1369
1370 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1371 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1372 {
1373         cl->police = p->police;
1374
1375         if (cl->q->handle) {
1376                 if (p->police == TC_POLICE_RECLASSIFY)
1377                         cl->q->reshape_fail = cbq_reshape_fail;
1378                 else
1379                         cl->q->reshape_fail = NULL;
1380         }
1381         return 0;
1382 }
1383 #endif
1384
1385 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1386 {
1387         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1388         return 0;
1389 }
1390
1391 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1392 {
1393         struct cbq_sched_data *q = qdisc_priv(sch);
1394         struct rtattr *tb[TCA_CBQ_MAX];
1395         struct tc_ratespec *r;
1396
1397         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1398             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1399             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1400                 return -EINVAL;
1401
1402         if (tb[TCA_CBQ_LSSOPT-1] &&
1403             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1404                 return -EINVAL;
1405
1406         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1407
1408         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1409                 return -EINVAL;
1410
1411         q->link.refcnt = 1;
1412         q->link.sibling = &q->link;
1413         q->link.classid = sch->handle;
1414         q->link.qdisc = sch;
1415         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1416                                             sch->handle)))
1417                 q->link.q = &noop_qdisc;
1418
1419         q->link.priority = TC_CBQ_MAXPRIO-1;
1420         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1421         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1422         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1423         q->link.overlimit = cbq_ovl_classic;
1424         q->link.allot = psched_mtu(sch->dev);
1425         q->link.quantum = q->link.allot;
1426         q->link.weight = q->link.R_tab->rate.rate;
1427
1428         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1429         q->link.avpkt = q->link.allot/2;
1430         q->link.minidle = -0x7FFFFFFF;
1431
1432         qdisc_watchdog_init(&q->watchdog, sch);
1433         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1434         q->delay_timer.function = cbq_undelay;
1435         q->toplevel = TC_CBQ_MAXLEVEL;
1436         q->now = psched_get_time();
1437         q->now_rt = q->now;
1438
1439         cbq_link_class(&q->link);
1440
1441         if (tb[TCA_CBQ_LSSOPT-1])
1442                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1443
1444         cbq_addprio(q, &q->link);
1445         return 0;
1446 }
1447
1448 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1449 {
1450         unsigned char *b = skb_tail_pointer(skb);
1451
1452         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1453         return skb->len;
1454
1455 rtattr_failure:
1456         nlmsg_trim(skb, b);
1457         return -1;
1458 }
1459
1460 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1461 {
1462         unsigned char *b = skb_tail_pointer(skb);
1463         struct tc_cbq_lssopt opt;
1464
1465         opt.flags = 0;
1466         if (cl->borrow == NULL)
1467                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1468         if (cl->share == NULL)
1469                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1470         opt.ewma_log = cl->ewma_log;
1471         opt.level = cl->level;
1472         opt.avpkt = cl->avpkt;
1473         opt.maxidle = cl->maxidle;
1474         opt.minidle = (u32)(-cl->minidle);
1475         opt.offtime = cl->offtime;
1476         opt.change = ~0;
1477         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1478         return skb->len;
1479
1480 rtattr_failure:
1481         nlmsg_trim(skb, b);
1482         return -1;
1483 }
1484
1485 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1486 {
1487         unsigned char *b = skb_tail_pointer(skb);
1488         struct tc_cbq_wrropt opt;
1489
1490         opt.flags = 0;
1491         opt.allot = cl->allot;
1492         opt.priority = cl->priority+1;
1493         opt.cpriority = cl->cpriority+1;
1494         opt.weight = cl->weight;
1495         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1496         return skb->len;
1497
1498 rtattr_failure:
1499         nlmsg_trim(skb, b);
1500         return -1;
1501 }
1502
1503 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1504 {
1505         unsigned char *b = skb_tail_pointer(skb);
1506         struct tc_cbq_ovl opt;
1507
1508         opt.strategy = cl->ovl_strategy;
1509         opt.priority2 = cl->priority2+1;
1510         opt.pad = 0;
1511         opt.penalty = cl->penalty;
1512         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1513         return skb->len;
1514
1515 rtattr_failure:
1516         nlmsg_trim(skb, b);
1517         return -1;
1518 }
1519
1520 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1521 {
1522         unsigned char *b = skb_tail_pointer(skb);
1523         struct tc_cbq_fopt opt;
1524
1525         if (cl->split || cl->defmap) {
1526                 opt.split = cl->split ? cl->split->classid : 0;
1527                 opt.defmap = cl->defmap;
1528                 opt.defchange = ~0;
1529                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1530         }
1531         return skb->len;
1532
1533 rtattr_failure:
1534         nlmsg_trim(skb, b);
1535         return -1;
1536 }
1537
1538 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1539 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1540 {
1541         unsigned char *b = skb_tail_pointer(skb);
1542         struct tc_cbq_police opt;
1543
1544         if (cl->police) {
1545                 opt.police = cl->police;
1546                 opt.__res1 = 0;
1547                 opt.__res2 = 0;
1548                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1549         }
1550         return skb->len;
1551
1552 rtattr_failure:
1553         nlmsg_trim(skb, b);
1554         return -1;
1555 }
1556 #endif
1557
1558 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1559 {
1560         if (cbq_dump_lss(skb, cl) < 0 ||
1561             cbq_dump_rate(skb, cl) < 0 ||
1562             cbq_dump_wrr(skb, cl) < 0 ||
1563             cbq_dump_ovl(skb, cl) < 0 ||
1564 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1565             cbq_dump_police(skb, cl) < 0 ||
1566 #endif
1567             cbq_dump_fopt(skb, cl) < 0)
1568                 return -1;
1569         return 0;
1570 }
1571
1572 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1573 {
1574         struct cbq_sched_data *q = qdisc_priv(sch);
1575         unsigned char *b = skb_tail_pointer(skb);
1576         struct rtattr *rta;
1577
1578         rta = (struct rtattr*)b;
1579         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1580         if (cbq_dump_attr(skb, &q->link) < 0)
1581                 goto rtattr_failure;
1582         rta->rta_len = skb_tail_pointer(skb) - b;
1583         return skb->len;
1584
1585 rtattr_failure:
1586         nlmsg_trim(skb, b);
1587         return -1;
1588 }
1589
1590 static int
1591 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1592 {
1593         struct cbq_sched_data *q = qdisc_priv(sch);
1594
1595         q->link.xstats.avgidle = q->link.avgidle;
1596         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1597 }
1598
1599 static int
1600 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1601                struct sk_buff *skb, struct tcmsg *tcm)
1602 {
1603         struct cbq_class *cl = (struct cbq_class*)arg;
1604         unsigned char *b = skb_tail_pointer(skb);
1605         struct rtattr *rta;
1606
1607         if (cl->tparent)
1608                 tcm->tcm_parent = cl->tparent->classid;
1609         else
1610                 tcm->tcm_parent = TC_H_ROOT;
1611         tcm->tcm_handle = cl->classid;
1612         tcm->tcm_info = cl->q->handle;
1613
1614         rta = (struct rtattr*)b;
1615         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1616         if (cbq_dump_attr(skb, cl) < 0)
1617                 goto rtattr_failure;
1618         rta->rta_len = skb_tail_pointer(skb) - b;
1619         return skb->len;
1620
1621 rtattr_failure:
1622         nlmsg_trim(skb, b);
1623         return -1;
1624 }
1625
1626 static int
1627 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1628         struct gnet_dump *d)
1629 {
1630         struct cbq_sched_data *q = qdisc_priv(sch);
1631         struct cbq_class *cl = (struct cbq_class*)arg;
1632
1633         cl->qstats.qlen = cl->q->q.qlen;
1634         cl->xstats.avgidle = cl->avgidle;
1635         cl->xstats.undertime = 0;
1636
1637         if (cl->undertime != PSCHED_PASTPERFECT)
1638                 cl->xstats.undertime = cl->undertime - q->now;
1639
1640         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1641             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1642             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1643                 return -1;
1644
1645         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1646 }
1647
1648 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1649                      struct Qdisc **old)
1650 {
1651         struct cbq_class *cl = (struct cbq_class*)arg;
1652
1653         if (cl) {
1654                 if (new == NULL) {
1655                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1656                                                      cl->classid)) == NULL)
1657                                 return -ENOBUFS;
1658                 } else {
1659 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1660                         if (cl->police == TC_POLICE_RECLASSIFY)
1661                                 new->reshape_fail = cbq_reshape_fail;
1662 #endif
1663                 }
1664                 sch_tree_lock(sch);
1665                 *old = xchg(&cl->q, new);
1666                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1667                 qdisc_reset(*old);
1668                 sch_tree_unlock(sch);
1669
1670                 return 0;
1671         }
1672         return -ENOENT;
1673 }
1674
1675 static struct Qdisc *
1676 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1677 {
1678         struct cbq_class *cl = (struct cbq_class*)arg;
1679
1680         return cl ? cl->q : NULL;
1681 }
1682
1683 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1684 {
1685         struct cbq_class *cl = (struct cbq_class *)arg;
1686
1687         if (cl->q->q.qlen == 0)
1688                 cbq_deactivate_class(cl);
1689 }
1690
1691 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1692 {
1693         struct cbq_sched_data *q = qdisc_priv(sch);
1694         struct cbq_class *cl = cbq_class_lookup(q, classid);
1695
1696         if (cl) {
1697                 cl->refcnt++;
1698                 return (unsigned long)cl;
1699         }
1700         return 0;
1701 }
1702
1703 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1704 {
1705         struct cbq_sched_data *q = qdisc_priv(sch);
1706
1707         BUG_TRAP(!cl->filters);
1708
1709         tcf_destroy_chain(cl->filter_list);
1710         qdisc_destroy(cl->q);
1711         qdisc_put_rtab(cl->R_tab);
1712         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1713         if (cl != &q->link)
1714                 kfree(cl);
1715 }
1716
1717 static void
1718 cbq_destroy(struct Qdisc* sch)
1719 {
1720         struct cbq_sched_data *q = qdisc_priv(sch);
1721         struct cbq_class *cl;
1722         unsigned h;
1723
1724 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1725         q->rx_class = NULL;
1726 #endif
1727         /*
1728          * Filters must be destroyed first because we don't destroy the
1729          * classes from root to leafs which means that filters can still
1730          * be bound to classes which have been destroyed already. --TGR '04
1731          */
1732         for (h = 0; h < 16; h++) {
1733                 for (cl = q->classes[h]; cl; cl = cl->next) {
1734                         tcf_destroy_chain(cl->filter_list);
1735                         cl->filter_list = NULL;
1736                 }
1737         }
1738         for (h = 0; h < 16; h++) {
1739                 struct cbq_class *next;
1740
1741                 for (cl = q->classes[h]; cl; cl = next) {
1742                         next = cl->next;
1743                         cbq_destroy_class(sch, cl);
1744                 }
1745         }
1746 }
1747
1748 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1749 {
1750         struct cbq_class *cl = (struct cbq_class*)arg;
1751
1752         if (--cl->refcnt == 0) {
1753 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1754                 struct cbq_sched_data *q = qdisc_priv(sch);
1755
1756                 spin_lock_bh(&sch->dev->queue_lock);
1757                 if (q->rx_class == cl)
1758                         q->rx_class = NULL;
1759                 spin_unlock_bh(&sch->dev->queue_lock);
1760 #endif
1761
1762                 cbq_destroy_class(sch, cl);
1763         }
1764 }
1765
1766 static int
1767 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1768                  unsigned long *arg)
1769 {
1770         int err;
1771         struct cbq_sched_data *q = qdisc_priv(sch);
1772         struct cbq_class *cl = (struct cbq_class*)*arg;
1773         struct rtattr *opt = tca[TCA_OPTIONS-1];
1774         struct rtattr *tb[TCA_CBQ_MAX];
1775         struct cbq_class *parent;
1776         struct qdisc_rate_table *rtab = NULL;
1777
1778         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1779                 return -EINVAL;
1780
1781         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1782             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1783                 return -EINVAL;
1784
1785         if (tb[TCA_CBQ_FOPT-1] &&
1786             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1787                 return -EINVAL;
1788
1789         if (tb[TCA_CBQ_RATE-1] &&
1790             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1791                         return -EINVAL;
1792
1793         if (tb[TCA_CBQ_LSSOPT-1] &&
1794             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1795                         return -EINVAL;
1796
1797         if (tb[TCA_CBQ_WRROPT-1] &&
1798             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1799                         return -EINVAL;
1800
1801 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1802         if (tb[TCA_CBQ_POLICE-1] &&
1803             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1804                         return -EINVAL;
1805 #endif
1806
1807         if (cl) {
1808                 /* Check parent */
1809                 if (parentid) {
1810                         if (cl->tparent && cl->tparent->classid != parentid)
1811                                 return -EINVAL;
1812                         if (!cl->tparent && parentid != TC_H_ROOT)
1813                                 return -EINVAL;
1814                 }
1815
1816                 if (tb[TCA_CBQ_RATE-1]) {
1817                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1818                         if (rtab == NULL)
1819                                 return -EINVAL;
1820                 }
1821
1822                 /* Change class parameters */
1823                 sch_tree_lock(sch);
1824
1825                 if (cl->next_alive != NULL)
1826                         cbq_deactivate_class(cl);
1827
1828                 if (rtab) {
1829                         rtab = xchg(&cl->R_tab, rtab);
1830                         qdisc_put_rtab(rtab);
1831                 }
1832
1833                 if (tb[TCA_CBQ_LSSOPT-1])
1834                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1835
1836                 if (tb[TCA_CBQ_WRROPT-1]) {
1837                         cbq_rmprio(q, cl);
1838                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1839                 }
1840
1841                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1842                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1843
1844 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1845                 if (tb[TCA_CBQ_POLICE-1])
1846                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1847 #endif
1848
1849                 if (tb[TCA_CBQ_FOPT-1])
1850                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1851
1852                 if (cl->q->q.qlen)
1853                         cbq_activate_class(cl);
1854
1855                 sch_tree_unlock(sch);
1856
1857                 if (tca[TCA_RATE-1])
1858                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1859                                               &sch->dev->queue_lock,
1860                                               tca[TCA_RATE-1]);
1861                 return 0;
1862         }
1863
1864         if (parentid == TC_H_ROOT)
1865                 return -EINVAL;
1866
1867         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1868             tb[TCA_CBQ_LSSOPT-1] == NULL)
1869                 return -EINVAL;
1870
1871         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1872         if (rtab == NULL)
1873                 return -EINVAL;
1874
1875         if (classid) {
1876                 err = -EINVAL;
1877                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1878                         goto failure;
1879         } else {
1880                 int i;
1881                 classid = TC_H_MAKE(sch->handle,0x8000);
1882
1883                 for (i=0; i<0x8000; i++) {
1884                         if (++q->hgenerator >= 0x8000)
1885                                 q->hgenerator = 1;
1886                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1887                                 break;
1888                 }
1889                 err = -ENOSR;
1890                 if (i >= 0x8000)
1891                         goto failure;
1892                 classid = classid|q->hgenerator;
1893         }
1894
1895         parent = &q->link;
1896         if (parentid) {
1897                 parent = cbq_class_lookup(q, parentid);
1898                 err = -EINVAL;
1899                 if (parent == NULL)
1900                         goto failure;
1901         }
1902
1903         err = -ENOBUFS;
1904         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1905         if (cl == NULL)
1906                 goto failure;
1907         cl->R_tab = rtab;
1908         rtab = NULL;
1909         cl->refcnt = 1;
1910         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1911                 cl->q = &noop_qdisc;
1912         cl->classid = classid;
1913         cl->tparent = parent;
1914         cl->qdisc = sch;
1915         cl->allot = parent->allot;
1916         cl->quantum = cl->allot;
1917         cl->weight = cl->R_tab->rate.rate;
1918
1919         sch_tree_lock(sch);
1920         cbq_link_class(cl);
1921         cl->borrow = cl->tparent;
1922         if (cl->tparent != &q->link)
1923                 cl->share = cl->tparent;
1924         cbq_adjust_levels(parent);
1925         cl->minidle = -0x7FFFFFFF;
1926         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1927         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1928         if (cl->ewma_log==0)
1929                 cl->ewma_log = q->link.ewma_log;
1930         if (cl->maxidle==0)
1931                 cl->maxidle = q->link.maxidle;
1932         if (cl->avpkt==0)
1933                 cl->avpkt = q->link.avpkt;
1934         cl->overlimit = cbq_ovl_classic;
1935         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1936                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1937 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1938         if (tb[TCA_CBQ_POLICE-1])
1939                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1940 #endif
1941         if (tb[TCA_CBQ_FOPT-1])
1942                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1943         sch_tree_unlock(sch);
1944
1945         if (tca[TCA_RATE-1])
1946                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1947                                   &sch->dev->queue_lock, tca[TCA_RATE-1]);
1948
1949         *arg = (unsigned long)cl;
1950         return 0;
1951
1952 failure:
1953         qdisc_put_rtab(rtab);
1954         return err;
1955 }
1956
1957 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1958 {
1959         struct cbq_sched_data *q = qdisc_priv(sch);
1960         struct cbq_class *cl = (struct cbq_class*)arg;
1961         unsigned int qlen;
1962
1963         if (cl->filters || cl->children || cl == &q->link)
1964                 return -EBUSY;
1965
1966         sch_tree_lock(sch);
1967
1968         qlen = cl->q->q.qlen;
1969         qdisc_reset(cl->q);
1970         qdisc_tree_decrease_qlen(cl->q, qlen);
1971
1972         if (cl->next_alive)
1973                 cbq_deactivate_class(cl);
1974
1975         if (q->tx_borrowed == cl)
1976                 q->tx_borrowed = q->tx_class;
1977         if (q->tx_class == cl) {
1978                 q->tx_class = NULL;
1979                 q->tx_borrowed = NULL;
1980         }
1981 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1982         if (q->rx_class == cl)
1983                 q->rx_class = NULL;
1984 #endif
1985
1986         cbq_unlink_class(cl);
1987         cbq_adjust_levels(cl->tparent);
1988         cl->defmap = 0;
1989         cbq_sync_defmap(cl);
1990
1991         cbq_rmprio(q, cl);
1992         sch_tree_unlock(sch);
1993
1994         if (--cl->refcnt == 0)
1995                 cbq_destroy_class(sch, cl);
1996
1997         return 0;
1998 }
1999
2000 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2001 {
2002         struct cbq_sched_data *q = qdisc_priv(sch);
2003         struct cbq_class *cl = (struct cbq_class *)arg;
2004
2005         if (cl == NULL)
2006                 cl = &q->link;
2007
2008         return &cl->filter_list;
2009 }
2010
2011 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2012                                      u32 classid)
2013 {
2014         struct cbq_sched_data *q = qdisc_priv(sch);
2015         struct cbq_class *p = (struct cbq_class*)parent;
2016         struct cbq_class *cl = cbq_class_lookup(q, classid);
2017
2018         if (cl) {
2019                 if (p && p->level <= cl->level)
2020                         return 0;
2021                 cl->filters++;
2022                 return (unsigned long)cl;
2023         }
2024         return 0;
2025 }
2026
2027 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2028 {
2029         struct cbq_class *cl = (struct cbq_class*)arg;
2030
2031         cl->filters--;
2032 }
2033
2034 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2035 {
2036         struct cbq_sched_data *q = qdisc_priv(sch);
2037         unsigned h;
2038
2039         if (arg->stop)
2040                 return;
2041
2042         for (h = 0; h < 16; h++) {
2043                 struct cbq_class *cl;
2044
2045                 for (cl = q->classes[h]; cl; cl = cl->next) {
2046                         if (arg->count < arg->skip) {
2047                                 arg->count++;
2048                                 continue;
2049                         }
2050                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2051                                 arg->stop = 1;
2052                                 return;
2053                         }
2054                         arg->count++;
2055                 }
2056         }
2057 }
2058
2059 static struct Qdisc_class_ops cbq_class_ops = {
2060         .graft          =       cbq_graft,
2061         .leaf           =       cbq_leaf,
2062         .qlen_notify    =       cbq_qlen_notify,
2063         .get            =       cbq_get,
2064         .put            =       cbq_put,
2065         .change         =       cbq_change_class,
2066         .delete         =       cbq_delete,
2067         .walk           =       cbq_walk,
2068         .tcf_chain      =       cbq_find_tcf,
2069         .bind_tcf       =       cbq_bind_filter,
2070         .unbind_tcf     =       cbq_unbind_filter,
2071         .dump           =       cbq_dump_class,
2072         .dump_stats     =       cbq_dump_class_stats,
2073 };
2074
2075 static struct Qdisc_ops cbq_qdisc_ops = {
2076         .next           =       NULL,
2077         .cl_ops         =       &cbq_class_ops,
2078         .id             =       "cbq",
2079         .priv_size      =       sizeof(struct cbq_sched_data),
2080         .enqueue        =       cbq_enqueue,
2081         .dequeue        =       cbq_dequeue,
2082         .requeue        =       cbq_requeue,
2083         .drop           =       cbq_drop,
2084         .init           =       cbq_init,
2085         .reset          =       cbq_reset,
2086         .destroy        =       cbq_destroy,
2087         .change         =       NULL,
2088         .dump           =       cbq_dump,
2089         .dump_stats     =       cbq_dump_stats,
2090         .owner          =       THIS_MODULE,
2091 };
2092
2093 static int __init cbq_module_init(void)
2094 {
2095         return register_qdisc(&cbq_qdisc_ops);
2096 }
2097 static void __exit cbq_module_exit(void)
2098 {
2099         unregister_qdisc(&cbq_qdisc_ops);
2100 }
2101 module_init(cbq_module_init)
2102 module_exit(cbq_module_exit)
2103 MODULE_LICENSE("GPL");