[NET_SCHED]: kill PSCHED_TDIFF
[safe/jmp/linux-2.6] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
20 #include <linux/mm.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/in.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
31 #include <net/ip.h>
32 #include <net/netlink.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37
38
39 /*      Class-Based Queueing (CBQ) algorithm.
40         =======================================
41
42         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43                  Management Models for Packet Networks",
44                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
46                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47
48                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
49                  Parameters", 1996
50
51                  [4] Sally Floyd and Michael Speer, "Experimental Results
52                  for Class-Based Queueing", 1998, not published.
53
54         -----------------------------------------------------------------------
55
56         Algorithm skeleton was taken from NS simulator cbq.cc.
57         If someone wants to check this code against the LBL version,
58         he should take into account that ONLY the skeleton was borrowed,
59         the implementation is different. Particularly:
60
61         --- The WRR algorithm is different. Our version looks more
62         reasonable (I hope) and works when quanta are allowed to be
63         less than MTU, which is always the case when real time classes
64         have small rates. Note, that the statement of [3] is
65         incomplete, delay may actually be estimated even if class
66         per-round allotment is less than MTU. Namely, if per-round
67         allotment is W*r_i, and r_1+...+r_k = r < 1
68
69         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71         In the worst case we have IntServ estimate with D = W*r+k*MTU
72         and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76         interpret some places, which look like wrong translations
77         from NS. Anyone is advised to find these differences
78         and explain to me, why I am wrong 8).
79
80         --- Linux has no EOI event, so that we cannot estimate true class
81         idle time. Workaround is to consider the next dequeue event
82         as sign that previous packet is finished. This is wrong because of
83         internal device queueing, but on a permanently loaded link it is true.
84         Moreover, combined with clock integrator, this scheme looks
85         very close to an ideal solution.  */
86
87 struct cbq_sched_data;
88
89
90 struct cbq_class
91 {
92         struct cbq_class        *next;          /* hash table link */
93         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
94
95 /* Parameters */
96         u32                     classid;
97         unsigned char           priority;       /* class priority */
98         unsigned char           priority2;      /* priority to be used after overlimit */
99         unsigned char           ewma_log;       /* time constant for idle time calculation */
100         unsigned char           ovl_strategy;
101 #ifdef CONFIG_NET_CLS_POLICE
102         unsigned char           police;
103 #endif
104
105         u32                     defmap;
106
107         /* Link-sharing scheduler parameters */
108         long                    maxidle;        /* Class parameters: see below. */
109         long                    offtime;
110         long                    minidle;
111         u32                     avpkt;
112         struct qdisc_rate_table *R_tab;
113
114         /* Overlimit strategy parameters */
115         void                    (*overlimit)(struct cbq_class *cl);
116         psched_tdiff_t          penalty;
117
118         /* General scheduler (WRR) parameters */
119         long                    allot;
120         long                    quantum;        /* Allotment per WRR round */
121         long                    weight;         /* Relative allotment: see below */
122
123         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
124         struct cbq_class        *split;         /* Ptr to split node */
125         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
126         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
127         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
128                                                    parent otherwise */
129         struct cbq_class        *sibling;       /* Sibling chain */
130         struct cbq_class        *children;      /* Pointer to children chain */
131
132         struct Qdisc            *q;             /* Elementary queueing discipline */
133
134
135 /* Variables */
136         unsigned char           cpriority;      /* Effective priority */
137         unsigned char           delayed;
138         unsigned char           level;          /* level of the class in hierarchy:
139                                                    0 for leaf classes, and maximal
140                                                    level of children + 1 for nodes.
141                                                  */
142
143         psched_time_t           last;           /* Last end of service */
144         psched_time_t           undertime;
145         long                    avgidle;
146         long                    deficit;        /* Saved deficit for WRR */
147         psched_time_t           penalized;
148         struct gnet_stats_basic bstats;
149         struct gnet_stats_queue qstats;
150         struct gnet_stats_rate_est rate_est;
151         spinlock_t              *stats_lock;
152         struct tc_cbq_xstats    xstats;
153
154         struct tcf_proto        *filter_list;
155
156         int                     refcnt;
157         int                     filters;
158
159         struct cbq_class        *defaults[TC_PRIO_MAX+1];
160 };
161
162 struct cbq_sched_data
163 {
164         struct cbq_class        *classes[16];           /* Hash table of all classes */
165         int                     nclasses[TC_CBQ_MAXPRIO+1];
166         unsigned                quanta[TC_CBQ_MAXPRIO+1];
167
168         struct cbq_class        link;
169
170         unsigned                activemask;
171         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
172                                                                    with backlog */
173
174 #ifdef CONFIG_NET_CLS_POLICE
175         struct cbq_class        *rx_class;
176 #endif
177         struct cbq_class        *tx_class;
178         struct cbq_class        *tx_borrowed;
179         int                     tx_len;
180         psched_time_t           now;            /* Cached timestamp */
181         psched_time_t           now_rt;         /* Cached real time */
182         unsigned                pmask;
183
184         struct hrtimer          delay_timer;
185         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
186                                                    started when CBQ has
187                                                    backlog, but cannot
188                                                    transmit just now */
189         psched_tdiff_t          wd_expires;
190         int                     toplevel;
191         u32                     hgenerator;
192 };
193
194
195 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196
197
198 static __inline__ unsigned cbq_hash(u32 h)
199 {
200         h ^= h>>8;
201         h ^= h>>4;
202         return h&0xF;
203 }
204
205 static __inline__ struct cbq_class *
206 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207 {
208         struct cbq_class *cl;
209
210         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211                 if (cl->classid == classid)
212                         return cl;
213         return NULL;
214 }
215
216 #ifdef CONFIG_NET_CLS_POLICE
217
218 static struct cbq_class *
219 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220 {
221         struct cbq_class *cl, *new;
222
223         for (cl = this->tparent; cl; cl = cl->tparent)
224                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
225                         return new;
226
227         return NULL;
228 }
229
230 #endif
231
232 /* Classify packet. The procedure is pretty complicated, but
233    it allows us to combine link sharing and priority scheduling
234    transparently.
235
236    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237    so that it resolves to split nodes. Then packets are classified
238    by logical priority, or a more specific classifier may be attached
239    to the split node.
240  */
241
242 static struct cbq_class *
243 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244 {
245         struct cbq_sched_data *q = qdisc_priv(sch);
246         struct cbq_class *head = &q->link;
247         struct cbq_class **defmap;
248         struct cbq_class *cl = NULL;
249         u32 prio = skb->priority;
250         struct tcf_result res;
251
252         /*
253          *  Step 1. If skb->priority points to one of our classes, use it.
254          */
255         if (TC_H_MAJ(prio^sch->handle) == 0 &&
256             (cl = cbq_class_lookup(q, prio)) != NULL)
257                 return cl;
258
259         *qerr = NET_XMIT_BYPASS;
260         for (;;) {
261                 int result = 0;
262                 defmap = head->defaults;
263
264                 /*
265                  * Step 2+n. Apply classifier.
266                  */
267                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268                         goto fallback;
269
270                 if ((cl = (void*)res.class) == NULL) {
271                         if (TC_H_MAJ(res.classid))
272                                 cl = cbq_class_lookup(q, res.classid);
273                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274                                 cl = defmap[TC_PRIO_BESTEFFORT];
275
276                         if (cl == NULL || cl->level >= head->level)
277                                 goto fallback;
278                 }
279
280 #ifdef CONFIG_NET_CLS_ACT
281                 switch (result) {
282                 case TC_ACT_QUEUED:
283                 case TC_ACT_STOLEN:
284                         *qerr = NET_XMIT_SUCCESS;
285                 case TC_ACT_SHOT:
286                         return NULL;
287                 }
288 #elif defined(CONFIG_NET_CLS_POLICE)
289                 switch (result) {
290                 case TC_POLICE_RECLASSIFY:
291                         return cbq_reclassify(skb, cl);
292                 case TC_POLICE_SHOT:
293                         return NULL;
294                 default:
295                         break;
296                 }
297 #endif
298                 if (cl->level == 0)
299                         return cl;
300
301                 /*
302                  * Step 3+n. If classifier selected a link sharing class,
303                  *         apply agency specific classifier.
304                  *         Repeat this procdure until we hit a leaf node.
305                  */
306                 head = cl;
307         }
308
309 fallback:
310         cl = head;
311
312         /*
313          * Step 4. No success...
314          */
315         if (TC_H_MAJ(prio) == 0 &&
316             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
318                 return head;
319
320         return cl;
321 }
322
323 /*
324    A packet has just been enqueued on the empty class.
325    cbq_activate_class adds it to the tail of active class list
326    of its priority band.
327  */
328
329 static __inline__ void cbq_activate_class(struct cbq_class *cl)
330 {
331         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332         int prio = cl->cpriority;
333         struct cbq_class *cl_tail;
334
335         cl_tail = q->active[prio];
336         q->active[prio] = cl;
337
338         if (cl_tail != NULL) {
339                 cl->next_alive = cl_tail->next_alive;
340                 cl_tail->next_alive = cl;
341         } else {
342                 cl->next_alive = cl;
343                 q->activemask |= (1<<prio);
344         }
345 }
346
347 /*
348    Unlink class from active chain.
349    Note that this same procedure is done directly in cbq_dequeue*
350    during round-robin procedure.
351  */
352
353 static void cbq_deactivate_class(struct cbq_class *this)
354 {
355         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356         int prio = this->cpriority;
357         struct cbq_class *cl;
358         struct cbq_class *cl_prev = q->active[prio];
359
360         do {
361                 cl = cl_prev->next_alive;
362                 if (cl == this) {
363                         cl_prev->next_alive = cl->next_alive;
364                         cl->next_alive = NULL;
365
366                         if (cl == q->active[prio]) {
367                                 q->active[prio] = cl_prev;
368                                 if (cl == q->active[prio]) {
369                                         q->active[prio] = NULL;
370                                         q->activemask &= ~(1<<prio);
371                                         return;
372                                 }
373                         }
374                         return;
375                 }
376         } while ((cl_prev = cl) != q->active[prio]);
377 }
378
379 static void
380 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381 {
382         int toplevel = q->toplevel;
383
384         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
385                 psched_time_t now;
386                 psched_tdiff_t incr;
387
388                 PSCHED_GET_TIME(now);
389                 incr = now - q->now_rt;
390                 now = q->now + incr;
391
392                 do {
393                         if (cl->undertime < now) {
394                                 q->toplevel = cl->level;
395                                 return;
396                         }
397                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
398         }
399 }
400
401 static int
402 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403 {
404         struct cbq_sched_data *q = qdisc_priv(sch);
405         int len = skb->len;
406         int ret;
407         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408
409 #ifdef CONFIG_NET_CLS_POLICE
410         q->rx_class = cl;
411 #endif
412         if (cl == NULL) {
413                 if (ret == NET_XMIT_BYPASS)
414                         sch->qstats.drops++;
415                 kfree_skb(skb);
416                 return ret;
417         }
418
419 #ifdef CONFIG_NET_CLS_POLICE
420         cl->q->__parent = sch;
421 #endif
422         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423                 sch->q.qlen++;
424                 sch->bstats.packets++;
425                 sch->bstats.bytes+=len;
426                 cbq_mark_toplevel(q, cl);
427                 if (!cl->next_alive)
428                         cbq_activate_class(cl);
429                 return ret;
430         }
431
432         sch->qstats.drops++;
433         cbq_mark_toplevel(q, cl);
434         cl->qstats.drops++;
435         return ret;
436 }
437
438 static int
439 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440 {
441         struct cbq_sched_data *q = qdisc_priv(sch);
442         struct cbq_class *cl;
443         int ret;
444
445         if ((cl = q->tx_class) == NULL) {
446                 kfree_skb(skb);
447                 sch->qstats.drops++;
448                 return NET_XMIT_CN;
449         }
450         q->tx_class = NULL;
451
452         cbq_mark_toplevel(q, cl);
453
454 #ifdef CONFIG_NET_CLS_POLICE
455         q->rx_class = cl;
456         cl->q->__parent = sch;
457 #endif
458         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459                 sch->q.qlen++;
460                 sch->qstats.requeues++;
461                 if (!cl->next_alive)
462                         cbq_activate_class(cl);
463                 return 0;
464         }
465         sch->qstats.drops++;
466         cl->qstats.drops++;
467         return ret;
468 }
469
470 /* Overlimit actions */
471
472 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473
474 static void cbq_ovl_classic(struct cbq_class *cl)
475 {
476         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
477         psched_tdiff_t delay = cl->undertime - q->now;
478
479         if (!cl->delayed) {
480                 delay += cl->offtime;
481
482                 /*
483                    Class goes to sleep, so that it will have no
484                    chance to work avgidle. Let's forgive it 8)
485
486                    BTW cbq-2.0 has a crap in this
487                    place, apparently they forgot to shift it by cl->ewma_log.
488                  */
489                 if (cl->avgidle < 0)
490                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
491                 if (cl->avgidle < cl->minidle)
492                         cl->avgidle = cl->minidle;
493                 if (delay <= 0)
494                         delay = 1;
495                 cl->undertime = q->now + delay;
496
497                 cl->xstats.overactions++;
498                 cl->delayed = 1;
499         }
500         if (q->wd_expires == 0 || q->wd_expires > delay)
501                 q->wd_expires = delay;
502
503         /* Dirty work! We must schedule wakeups based on
504            real available rate, rather than leaf rate,
505            which may be tiny (even zero).
506          */
507         if (q->toplevel == TC_CBQ_MAXLEVEL) {
508                 struct cbq_class *b;
509                 psched_tdiff_t base_delay = q->wd_expires;
510
511                 for (b = cl->borrow; b; b = b->borrow) {
512                         delay = b->undertime - q->now;
513                         if (delay < base_delay) {
514                                 if (delay <= 0)
515                                         delay = 1;
516                                 base_delay = delay;
517                         }
518                 }
519
520                 q->wd_expires = base_delay;
521         }
522 }
523
524 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
525    they go overlimit
526  */
527
528 static void cbq_ovl_rclassic(struct cbq_class *cl)
529 {
530         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
531         struct cbq_class *this = cl;
532
533         do {
534                 if (cl->level > q->toplevel) {
535                         cl = NULL;
536                         break;
537                 }
538         } while ((cl = cl->borrow) != NULL);
539
540         if (cl == NULL)
541                 cl = this;
542         cbq_ovl_classic(cl);
543 }
544
545 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546
547 static void cbq_ovl_delay(struct cbq_class *cl)
548 {
549         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
550         psched_tdiff_t delay = cl->undertime - q->now;
551
552         if (!cl->delayed) {
553                 psched_time_t sched = q->now;
554                 ktime_t expires;
555
556                 delay += cl->offtime;
557                 if (cl->avgidle < 0)
558                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
559                 if (cl->avgidle < cl->minidle)
560                         cl->avgidle = cl->minidle;
561                 cl->undertime = q->now + delay;
562
563                 if (delay > 0) {
564                         sched += delay + cl->penalty;
565                         cl->penalized = sched;
566                         cl->cpriority = TC_CBQ_MAXPRIO;
567                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
568
569                         expires = ktime_set(0, 0);
570                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
571                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
572                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
573                                                   expires)) > 0)
574                                 q->delay_timer.expires = expires;
575                         hrtimer_restart(&q->delay_timer);
576                         cl->delayed = 1;
577                         cl->xstats.overactions++;
578                         return;
579                 }
580                 delay = 1;
581         }
582         if (q->wd_expires == 0 || q->wd_expires > delay)
583                 q->wd_expires = delay;
584 }
585
586 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
587
588 static void cbq_ovl_lowprio(struct cbq_class *cl)
589 {
590         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
591
592         cl->penalized = q->now + cl->penalty;
593
594         if (cl->cpriority != cl->priority2) {
595                 cl->cpriority = cl->priority2;
596                 q->pmask |= (1<<cl->cpriority);
597                 cl->xstats.overactions++;
598         }
599         cbq_ovl_classic(cl);
600 }
601
602 /* TC_CBQ_OVL_DROP: penalize class by dropping */
603
604 static void cbq_ovl_drop(struct cbq_class *cl)
605 {
606         if (cl->q->ops->drop)
607                 if (cl->q->ops->drop(cl->q))
608                         cl->qdisc->q.qlen--;
609         cl->xstats.overactions++;
610         cbq_ovl_classic(cl);
611 }
612
613 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
614                                        psched_time_t now)
615 {
616         struct cbq_class *cl;
617         struct cbq_class *cl_prev = q->active[prio];
618         psched_time_t sched = now;
619
620         if (cl_prev == NULL)
621                 return 0;
622
623         do {
624                 cl = cl_prev->next_alive;
625                 if (now - cl->penalized > 0) {
626                         cl_prev->next_alive = cl->next_alive;
627                         cl->next_alive = NULL;
628                         cl->cpriority = cl->priority;
629                         cl->delayed = 0;
630                         cbq_activate_class(cl);
631
632                         if (cl == q->active[prio]) {
633                                 q->active[prio] = cl_prev;
634                                 if (cl == q->active[prio]) {
635                                         q->active[prio] = NULL;
636                                         return 0;
637                                 }
638                         }
639
640                         cl = cl_prev->next_alive;
641                 } else if (sched - cl->penalized > 0)
642                         sched = cl->penalized;
643         } while ((cl_prev = cl) != q->active[prio]);
644
645         return sched - now;
646 }
647
648 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
649 {
650         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
651                                                 delay_timer);
652         struct Qdisc *sch = q->watchdog.qdisc;
653         psched_time_t now;
654         psched_tdiff_t delay = 0;
655         unsigned pmask;
656
657         PSCHED_GET_TIME(now);
658
659         pmask = q->pmask;
660         q->pmask = 0;
661
662         while (pmask) {
663                 int prio = ffz(~pmask);
664                 psched_tdiff_t tmp;
665
666                 pmask &= ~(1<<prio);
667
668                 tmp = cbq_undelay_prio(q, prio, now);
669                 if (tmp > 0) {
670                         q->pmask |= 1<<prio;
671                         if (tmp < delay || delay == 0)
672                                 delay = tmp;
673                 }
674         }
675
676         if (delay) {
677                 ktime_t time;
678
679                 time = ktime_set(0, 0);
680                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
681                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
682         }
683
684         sch->flags &= ~TCQ_F_THROTTLED;
685         netif_schedule(sch->dev);
686         return HRTIMER_NORESTART;
687 }
688
689
690 #ifdef CONFIG_NET_CLS_POLICE
691
692 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
693 {
694         int len = skb->len;
695         struct Qdisc *sch = child->__parent;
696         struct cbq_sched_data *q = qdisc_priv(sch);
697         struct cbq_class *cl = q->rx_class;
698
699         q->rx_class = NULL;
700
701         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
702
703                 cbq_mark_toplevel(q, cl);
704
705                 q->rx_class = cl;
706                 cl->q->__parent = sch;
707
708                 if (cl->q->enqueue(skb, cl->q) == 0) {
709                         sch->q.qlen++;
710                         sch->bstats.packets++;
711                         sch->bstats.bytes+=len;
712                         if (!cl->next_alive)
713                                 cbq_activate_class(cl);
714                         return 0;
715                 }
716                 sch->qstats.drops++;
717                 return 0;
718         }
719
720         sch->qstats.drops++;
721         return -1;
722 }
723 #endif
724
725 /*
726    It is mission critical procedure.
727
728    We "regenerate" toplevel cutoff, if transmitting class
729    has backlog and it is not regulated. It is not part of
730    original CBQ description, but looks more reasonable.
731    Probably, it is wrong. This question needs further investigation.
732 */
733
734 static __inline__ void
735 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
736                     struct cbq_class *borrowed)
737 {
738         if (cl && q->toplevel >= borrowed->level) {
739                 if (cl->q->q.qlen > 1) {
740                         do {
741                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
742                                         q->toplevel = borrowed->level;
743                                         return;
744                                 }
745                         } while ((borrowed=borrowed->borrow) != NULL);
746                 }
747 #if 0
748         /* It is not necessary now. Uncommenting it
749            will save CPU cycles, but decrease fairness.
750          */
751                 q->toplevel = TC_CBQ_MAXLEVEL;
752 #endif
753         }
754 }
755
756 static void
757 cbq_update(struct cbq_sched_data *q)
758 {
759         struct cbq_class *this = q->tx_class;
760         struct cbq_class *cl = this;
761         int len = q->tx_len;
762
763         q->tx_class = NULL;
764
765         for ( ; cl; cl = cl->share) {
766                 long avgidle = cl->avgidle;
767                 long idle;
768
769                 cl->bstats.packets++;
770                 cl->bstats.bytes += len;
771
772                 /*
773                    (now - last) is total time between packet right edges.
774                    (last_pktlen/rate) is "virtual" busy time, so that
775
776                          idle = (now - last) - last_pktlen/rate
777                  */
778
779                 idle = q->now - cl->last;
780                 if ((unsigned long)idle > 128*1024*1024) {
781                         avgidle = cl->maxidle;
782                 } else {
783                         idle -= L2T(cl, len);
784
785                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
786                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
787                    cl->avgidle == true_avgidle/W,
788                    hence:
789                  */
790                         avgidle += idle - (avgidle>>cl->ewma_log);
791                 }
792
793                 if (avgidle <= 0) {
794                         /* Overlimit or at-limit */
795
796                         if (avgidle < cl->minidle)
797                                 avgidle = cl->minidle;
798
799                         cl->avgidle = avgidle;
800
801                         /* Calculate expected time, when this class
802                            will be allowed to send.
803                            It will occur, when:
804                            (1-W)*true_avgidle + W*delay = 0, i.e.
805                            idle = (1/W - 1)*(-true_avgidle)
806                            or
807                            idle = (1 - W)*(-cl->avgidle);
808                          */
809                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
810
811                         /*
812                            That is not all.
813                            To maintain the rate allocated to the class,
814                            we add to undertime virtual clock,
815                            necessary to complete transmitted packet.
816                            (len/phys_bandwidth has been already passed
817                            to the moment of cbq_update)
818                          */
819
820                         idle -= L2T(&q->link, len);
821                         idle += L2T(cl, len);
822
823                         cl->undertime = q->now + idle;
824                 } else {
825                         /* Underlimit */
826
827                         cl->undertime = PSCHED_PASTPERFECT;
828                         if (avgidle > cl->maxidle)
829                                 cl->avgidle = cl->maxidle;
830                         else
831                                 cl->avgidle = avgidle;
832                 }
833                 cl->last = q->now;
834         }
835
836         cbq_update_toplevel(q, this, q->tx_borrowed);
837 }
838
839 static __inline__ struct cbq_class *
840 cbq_under_limit(struct cbq_class *cl)
841 {
842         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
843         struct cbq_class *this_cl = cl;
844
845         if (cl->tparent == NULL)
846                 return cl;
847
848         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
849                 cl->delayed = 0;
850                 return cl;
851         }
852
853         do {
854                 /* It is very suspicious place. Now overlimit
855                    action is generated for not bounded classes
856                    only if link is completely congested.
857                    Though it is in agree with ancestor-only paradigm,
858                    it looks very stupid. Particularly,
859                    it means that this chunk of code will either
860                    never be called or result in strong amplification
861                    of burstiness. Dangerous, silly, and, however,
862                    no another solution exists.
863                  */
864                 if ((cl = cl->borrow) == NULL) {
865                         this_cl->qstats.overlimits++;
866                         this_cl->overlimit(this_cl);
867                         return NULL;
868                 }
869                 if (cl->level > q->toplevel)
870                         return NULL;
871         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
872
873         cl->delayed = 0;
874         return cl;
875 }
876
877 static __inline__ struct sk_buff *
878 cbq_dequeue_prio(struct Qdisc *sch, int prio)
879 {
880         struct cbq_sched_data *q = qdisc_priv(sch);
881         struct cbq_class *cl_tail, *cl_prev, *cl;
882         struct sk_buff *skb;
883         int deficit;
884
885         cl_tail = cl_prev = q->active[prio];
886         cl = cl_prev->next_alive;
887
888         do {
889                 deficit = 0;
890
891                 /* Start round */
892                 do {
893                         struct cbq_class *borrow = cl;
894
895                         if (cl->q->q.qlen &&
896                             (borrow = cbq_under_limit(cl)) == NULL)
897                                 goto skip_class;
898
899                         if (cl->deficit <= 0) {
900                                 /* Class exhausted its allotment per
901                                    this round. Switch to the next one.
902                                  */
903                                 deficit = 1;
904                                 cl->deficit += cl->quantum;
905                                 goto next_class;
906                         }
907
908                         skb = cl->q->dequeue(cl->q);
909
910                         /* Class did not give us any skb :-(
911                            It could occur even if cl->q->q.qlen != 0
912                            f.e. if cl->q == "tbf"
913                          */
914                         if (skb == NULL)
915                                 goto skip_class;
916
917                         cl->deficit -= skb->len;
918                         q->tx_class = cl;
919                         q->tx_borrowed = borrow;
920                         if (borrow != cl) {
921 #ifndef CBQ_XSTATS_BORROWS_BYTES
922                                 borrow->xstats.borrows++;
923                                 cl->xstats.borrows++;
924 #else
925                                 borrow->xstats.borrows += skb->len;
926                                 cl->xstats.borrows += skb->len;
927 #endif
928                         }
929                         q->tx_len = skb->len;
930
931                         if (cl->deficit <= 0) {
932                                 q->active[prio] = cl;
933                                 cl = cl->next_alive;
934                                 cl->deficit += cl->quantum;
935                         }
936                         return skb;
937
938 skip_class:
939                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
940                                 /* Class is empty or penalized.
941                                    Unlink it from active chain.
942                                  */
943                                 cl_prev->next_alive = cl->next_alive;
944                                 cl->next_alive = NULL;
945
946                                 /* Did cl_tail point to it? */
947                                 if (cl == cl_tail) {
948                                         /* Repair it! */
949                                         cl_tail = cl_prev;
950
951                                         /* Was it the last class in this band? */
952                                         if (cl == cl_tail) {
953                                                 /* Kill the band! */
954                                                 q->active[prio] = NULL;
955                                                 q->activemask &= ~(1<<prio);
956                                                 if (cl->q->q.qlen)
957                                                         cbq_activate_class(cl);
958                                                 return NULL;
959                                         }
960
961                                         q->active[prio] = cl_tail;
962                                 }
963                                 if (cl->q->q.qlen)
964                                         cbq_activate_class(cl);
965
966                                 cl = cl_prev;
967                         }
968
969 next_class:
970                         cl_prev = cl;
971                         cl = cl->next_alive;
972                 } while (cl_prev != cl_tail);
973         } while (deficit);
974
975         q->active[prio] = cl_prev;
976
977         return NULL;
978 }
979
980 static __inline__ struct sk_buff *
981 cbq_dequeue_1(struct Qdisc *sch)
982 {
983         struct cbq_sched_data *q = qdisc_priv(sch);
984         struct sk_buff *skb;
985         unsigned activemask;
986
987         activemask = q->activemask&0xFF;
988         while (activemask) {
989                 int prio = ffz(~activemask);
990                 activemask &= ~(1<<prio);
991                 skb = cbq_dequeue_prio(sch, prio);
992                 if (skb)
993                         return skb;
994         }
995         return NULL;
996 }
997
998 static struct sk_buff *
999 cbq_dequeue(struct Qdisc *sch)
1000 {
1001         struct sk_buff *skb;
1002         struct cbq_sched_data *q = qdisc_priv(sch);
1003         psched_time_t now;
1004         psched_tdiff_t incr;
1005
1006         PSCHED_GET_TIME(now);
1007         incr = now - q->now_rt;
1008
1009         if (q->tx_class) {
1010                 psched_tdiff_t incr2;
1011                 /* Time integrator. We calculate EOS time
1012                    by adding expected packet transmission time.
1013                    If real time is greater, we warp artificial clock,
1014                    so that:
1015
1016                    cbq_time = max(real_time, work);
1017                  */
1018                 incr2 = L2T(&q->link, q->tx_len);
1019                 q->now += incr2;
1020                 cbq_update(q);
1021                 if ((incr -= incr2) < 0)
1022                         incr = 0;
1023         }
1024         q->now += incr;
1025         q->now_rt = now;
1026
1027         for (;;) {
1028                 q->wd_expires = 0;
1029
1030                 skb = cbq_dequeue_1(sch);
1031                 if (skb) {
1032                         sch->q.qlen--;
1033                         sch->flags &= ~TCQ_F_THROTTLED;
1034                         return skb;
1035                 }
1036
1037                 /* All the classes are overlimit.
1038
1039                    It is possible, if:
1040
1041                    1. Scheduler is empty.
1042                    2. Toplevel cutoff inhibited borrowing.
1043                    3. Root class is overlimit.
1044
1045                    Reset 2d and 3d conditions and retry.
1046
1047                    Note, that NS and cbq-2.0 are buggy, peeking
1048                    an arbitrary class is appropriate for ancestor-only
1049                    sharing, but not for toplevel algorithm.
1050
1051                    Our version is better, but slower, because it requires
1052                    two passes, but it is unavoidable with top-level sharing.
1053                 */
1054
1055                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1056                     q->link.undertime == PSCHED_PASTPERFECT)
1057                         break;
1058
1059                 q->toplevel = TC_CBQ_MAXLEVEL;
1060                 q->link.undertime = PSCHED_PASTPERFECT;
1061         }
1062
1063         /* No packets in scheduler or nobody wants to give them to us :-(
1064            Sigh... start watchdog timer in the last case. */
1065
1066         if (sch->q.qlen) {
1067                 sch->qstats.overlimits++;
1068                 if (q->wd_expires)
1069                         qdisc_watchdog_schedule(&q->watchdog,
1070                                                 now + q->wd_expires);
1071         }
1072         return NULL;
1073 }
1074
1075 /* CBQ class maintanance routines */
1076
1077 static void cbq_adjust_levels(struct cbq_class *this)
1078 {
1079         if (this == NULL)
1080                 return;
1081
1082         do {
1083                 int level = 0;
1084                 struct cbq_class *cl;
1085
1086                 if ((cl = this->children) != NULL) {
1087                         do {
1088                                 if (cl->level > level)
1089                                         level = cl->level;
1090                         } while ((cl = cl->sibling) != this->children);
1091                 }
1092                 this->level = level+1;
1093         } while ((this = this->tparent) != NULL);
1094 }
1095
1096 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1097 {
1098         struct cbq_class *cl;
1099         unsigned h;
1100
1101         if (q->quanta[prio] == 0)
1102                 return;
1103
1104         for (h=0; h<16; h++) {
1105                 for (cl = q->classes[h]; cl; cl = cl->next) {
1106                         /* BUGGGG... Beware! This expression suffer of
1107                            arithmetic overflows!
1108                          */
1109                         if (cl->priority == prio) {
1110                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1111                                         q->quanta[prio];
1112                         }
1113                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1114                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1115                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1116                         }
1117                 }
1118         }
1119 }
1120
1121 static void cbq_sync_defmap(struct cbq_class *cl)
1122 {
1123         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1124         struct cbq_class *split = cl->split;
1125         unsigned h;
1126         int i;
1127
1128         if (split == NULL)
1129                 return;
1130
1131         for (i=0; i<=TC_PRIO_MAX; i++) {
1132                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1133                         split->defaults[i] = NULL;
1134         }
1135
1136         for (i=0; i<=TC_PRIO_MAX; i++) {
1137                 int level = split->level;
1138
1139                 if (split->defaults[i])
1140                         continue;
1141
1142                 for (h=0; h<16; h++) {
1143                         struct cbq_class *c;
1144
1145                         for (c = q->classes[h]; c; c = c->next) {
1146                                 if (c->split == split && c->level < level &&
1147                                     c->defmap&(1<<i)) {
1148                                         split->defaults[i] = c;
1149                                         level = c->level;
1150                                 }
1151                         }
1152                 }
1153         }
1154 }
1155
1156 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1157 {
1158         struct cbq_class *split = NULL;
1159
1160         if (splitid == 0) {
1161                 if ((split = cl->split) == NULL)
1162                         return;
1163                 splitid = split->classid;
1164         }
1165
1166         if (split == NULL || split->classid != splitid) {
1167                 for (split = cl->tparent; split; split = split->tparent)
1168                         if (split->classid == splitid)
1169                                 break;
1170         }
1171
1172         if (split == NULL)
1173                 return;
1174
1175         if (cl->split != split) {
1176                 cl->defmap = 0;
1177                 cbq_sync_defmap(cl);
1178                 cl->split = split;
1179                 cl->defmap = def&mask;
1180         } else
1181                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1182
1183         cbq_sync_defmap(cl);
1184 }
1185
1186 static void cbq_unlink_class(struct cbq_class *this)
1187 {
1188         struct cbq_class *cl, **clp;
1189         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1190
1191         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1192                 if (cl == this) {
1193                         *clp = cl->next;
1194                         cl->next = NULL;
1195                         break;
1196                 }
1197         }
1198
1199         if (this->tparent) {
1200                 clp=&this->sibling;
1201                 cl = *clp;
1202                 do {
1203                         if (cl == this) {
1204                                 *clp = cl->sibling;
1205                                 break;
1206                         }
1207                         clp = &cl->sibling;
1208                 } while ((cl = *clp) != this->sibling);
1209
1210                 if (this->tparent->children == this) {
1211                         this->tparent->children = this->sibling;
1212                         if (this->sibling == this)
1213                                 this->tparent->children = NULL;
1214                 }
1215         } else {
1216                 BUG_TRAP(this->sibling == this);
1217         }
1218 }
1219
1220 static void cbq_link_class(struct cbq_class *this)
1221 {
1222         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1223         unsigned h = cbq_hash(this->classid);
1224         struct cbq_class *parent = this->tparent;
1225
1226         this->sibling = this;
1227         this->next = q->classes[h];
1228         q->classes[h] = this;
1229
1230         if (parent == NULL)
1231                 return;
1232
1233         if (parent->children == NULL) {
1234                 parent->children = this;
1235         } else {
1236                 this->sibling = parent->children->sibling;
1237                 parent->children->sibling = this;
1238         }
1239 }
1240
1241 static unsigned int cbq_drop(struct Qdisc* sch)
1242 {
1243         struct cbq_sched_data *q = qdisc_priv(sch);
1244         struct cbq_class *cl, *cl_head;
1245         int prio;
1246         unsigned int len;
1247
1248         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1249                 if ((cl_head = q->active[prio]) == NULL)
1250                         continue;
1251
1252                 cl = cl_head;
1253                 do {
1254                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1255                                 sch->q.qlen--;
1256                                 if (!cl->q->q.qlen)
1257                                         cbq_deactivate_class(cl);
1258                                 return len;
1259                         }
1260                 } while ((cl = cl->next_alive) != cl_head);
1261         }
1262         return 0;
1263 }
1264
1265 static void
1266 cbq_reset(struct Qdisc* sch)
1267 {
1268         struct cbq_sched_data *q = qdisc_priv(sch);
1269         struct cbq_class *cl;
1270         int prio;
1271         unsigned h;
1272
1273         q->activemask = 0;
1274         q->pmask = 0;
1275         q->tx_class = NULL;
1276         q->tx_borrowed = NULL;
1277         qdisc_watchdog_cancel(&q->watchdog);
1278         hrtimer_cancel(&q->delay_timer);
1279         q->toplevel = TC_CBQ_MAXLEVEL;
1280         PSCHED_GET_TIME(q->now);
1281         q->now_rt = q->now;
1282
1283         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1284                 q->active[prio] = NULL;
1285
1286         for (h = 0; h < 16; h++) {
1287                 for (cl = q->classes[h]; cl; cl = cl->next) {
1288                         qdisc_reset(cl->q);
1289
1290                         cl->next_alive = NULL;
1291                         cl->undertime = PSCHED_PASTPERFECT;
1292                         cl->avgidle = cl->maxidle;
1293                         cl->deficit = cl->quantum;
1294                         cl->cpriority = cl->priority;
1295                 }
1296         }
1297         sch->q.qlen = 0;
1298 }
1299
1300
1301 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1302 {
1303         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1304                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1305                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1306         }
1307         if (lss->change&TCF_CBQ_LSS_EWMA)
1308                 cl->ewma_log = lss->ewma_log;
1309         if (lss->change&TCF_CBQ_LSS_AVPKT)
1310                 cl->avpkt = lss->avpkt;
1311         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1312                 cl->minidle = -(long)lss->minidle;
1313         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1314                 cl->maxidle = lss->maxidle;
1315                 cl->avgidle = lss->maxidle;
1316         }
1317         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1318                 cl->offtime = lss->offtime;
1319         return 0;
1320 }
1321
1322 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1323 {
1324         q->nclasses[cl->priority]--;
1325         q->quanta[cl->priority] -= cl->weight;
1326         cbq_normalize_quanta(q, cl->priority);
1327 }
1328
1329 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1330 {
1331         q->nclasses[cl->priority]++;
1332         q->quanta[cl->priority] += cl->weight;
1333         cbq_normalize_quanta(q, cl->priority);
1334 }
1335
1336 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1337 {
1338         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1339
1340         if (wrr->allot)
1341                 cl->allot = wrr->allot;
1342         if (wrr->weight)
1343                 cl->weight = wrr->weight;
1344         if (wrr->priority) {
1345                 cl->priority = wrr->priority-1;
1346                 cl->cpriority = cl->priority;
1347                 if (cl->priority >= cl->priority2)
1348                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1349         }
1350
1351         cbq_addprio(q, cl);
1352         return 0;
1353 }
1354
1355 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1356 {
1357         switch (ovl->strategy) {
1358         case TC_CBQ_OVL_CLASSIC:
1359                 cl->overlimit = cbq_ovl_classic;
1360                 break;
1361         case TC_CBQ_OVL_DELAY:
1362                 cl->overlimit = cbq_ovl_delay;
1363                 break;
1364         case TC_CBQ_OVL_LOWPRIO:
1365                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1366                     ovl->priority2-1 <= cl->priority)
1367                         return -EINVAL;
1368                 cl->priority2 = ovl->priority2-1;
1369                 cl->overlimit = cbq_ovl_lowprio;
1370                 break;
1371         case TC_CBQ_OVL_DROP:
1372                 cl->overlimit = cbq_ovl_drop;
1373                 break;
1374         case TC_CBQ_OVL_RCLASSIC:
1375                 cl->overlimit = cbq_ovl_rclassic;
1376                 break;
1377         default:
1378                 return -EINVAL;
1379         }
1380         cl->penalty = ovl->penalty;
1381         return 0;
1382 }
1383
1384 #ifdef CONFIG_NET_CLS_POLICE
1385 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1386 {
1387         cl->police = p->police;
1388
1389         if (cl->q->handle) {
1390                 if (p->police == TC_POLICE_RECLASSIFY)
1391                         cl->q->reshape_fail = cbq_reshape_fail;
1392                 else
1393                         cl->q->reshape_fail = NULL;
1394         }
1395         return 0;
1396 }
1397 #endif
1398
1399 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1400 {
1401         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1402         return 0;
1403 }
1404
1405 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1406 {
1407         struct cbq_sched_data *q = qdisc_priv(sch);
1408         struct rtattr *tb[TCA_CBQ_MAX];
1409         struct tc_ratespec *r;
1410
1411         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1412             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1413             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1414                 return -EINVAL;
1415
1416         if (tb[TCA_CBQ_LSSOPT-1] &&
1417             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1418                 return -EINVAL;
1419
1420         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1421
1422         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1423                 return -EINVAL;
1424
1425         q->link.refcnt = 1;
1426         q->link.sibling = &q->link;
1427         q->link.classid = sch->handle;
1428         q->link.qdisc = sch;
1429         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1430                                             sch->handle)))
1431                 q->link.q = &noop_qdisc;
1432
1433         q->link.priority = TC_CBQ_MAXPRIO-1;
1434         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1435         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1436         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1437         q->link.overlimit = cbq_ovl_classic;
1438         q->link.allot = psched_mtu(sch->dev);
1439         q->link.quantum = q->link.allot;
1440         q->link.weight = q->link.R_tab->rate.rate;
1441
1442         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1443         q->link.avpkt = q->link.allot/2;
1444         q->link.minidle = -0x7FFFFFFF;
1445         q->link.stats_lock = &sch->dev->queue_lock;
1446
1447         qdisc_watchdog_init(&q->watchdog, sch);
1448         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1449         q->delay_timer.function = cbq_undelay;
1450         q->toplevel = TC_CBQ_MAXLEVEL;
1451         PSCHED_GET_TIME(q->now);
1452         q->now_rt = q->now;
1453
1454         cbq_link_class(&q->link);
1455
1456         if (tb[TCA_CBQ_LSSOPT-1])
1457                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1458
1459         cbq_addprio(q, &q->link);
1460         return 0;
1461 }
1462
1463 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1464 {
1465         unsigned char *b = skb_tail_pointer(skb);
1466
1467         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1468         return skb->len;
1469
1470 rtattr_failure:
1471         nlmsg_trim(skb, b);
1472         return -1;
1473 }
1474
1475 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1476 {
1477         unsigned char *b = skb_tail_pointer(skb);
1478         struct tc_cbq_lssopt opt;
1479
1480         opt.flags = 0;
1481         if (cl->borrow == NULL)
1482                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1483         if (cl->share == NULL)
1484                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1485         opt.ewma_log = cl->ewma_log;
1486         opt.level = cl->level;
1487         opt.avpkt = cl->avpkt;
1488         opt.maxidle = cl->maxidle;
1489         opt.minidle = (u32)(-cl->minidle);
1490         opt.offtime = cl->offtime;
1491         opt.change = ~0;
1492         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1493         return skb->len;
1494
1495 rtattr_failure:
1496         nlmsg_trim(skb, b);
1497         return -1;
1498 }
1499
1500 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1501 {
1502         unsigned char *b = skb_tail_pointer(skb);
1503         struct tc_cbq_wrropt opt;
1504
1505         opt.flags = 0;
1506         opt.allot = cl->allot;
1507         opt.priority = cl->priority+1;
1508         opt.cpriority = cl->cpriority+1;
1509         opt.weight = cl->weight;
1510         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1511         return skb->len;
1512
1513 rtattr_failure:
1514         nlmsg_trim(skb, b);
1515         return -1;
1516 }
1517
1518 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1519 {
1520         unsigned char *b = skb_tail_pointer(skb);
1521         struct tc_cbq_ovl opt;
1522
1523         opt.strategy = cl->ovl_strategy;
1524         opt.priority2 = cl->priority2+1;
1525         opt.pad = 0;
1526         opt.penalty = cl->penalty;
1527         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1528         return skb->len;
1529
1530 rtattr_failure:
1531         nlmsg_trim(skb, b);
1532         return -1;
1533 }
1534
1535 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1536 {
1537         unsigned char *b = skb_tail_pointer(skb);
1538         struct tc_cbq_fopt opt;
1539
1540         if (cl->split || cl->defmap) {
1541                 opt.split = cl->split ? cl->split->classid : 0;
1542                 opt.defmap = cl->defmap;
1543                 opt.defchange = ~0;
1544                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1545         }
1546         return skb->len;
1547
1548 rtattr_failure:
1549         nlmsg_trim(skb, b);
1550         return -1;
1551 }
1552
1553 #ifdef CONFIG_NET_CLS_POLICE
1554 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1555 {
1556         unsigned char *b = skb_tail_pointer(skb);
1557         struct tc_cbq_police opt;
1558
1559         if (cl->police) {
1560                 opt.police = cl->police;
1561                 opt.__res1 = 0;
1562                 opt.__res2 = 0;
1563                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1564         }
1565         return skb->len;
1566
1567 rtattr_failure:
1568         nlmsg_trim(skb, b);
1569         return -1;
1570 }
1571 #endif
1572
1573 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1574 {
1575         if (cbq_dump_lss(skb, cl) < 0 ||
1576             cbq_dump_rate(skb, cl) < 0 ||
1577             cbq_dump_wrr(skb, cl) < 0 ||
1578             cbq_dump_ovl(skb, cl) < 0 ||
1579 #ifdef CONFIG_NET_CLS_POLICE
1580             cbq_dump_police(skb, cl) < 0 ||
1581 #endif
1582             cbq_dump_fopt(skb, cl) < 0)
1583                 return -1;
1584         return 0;
1585 }
1586
1587 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1588 {
1589         struct cbq_sched_data *q = qdisc_priv(sch);
1590         unsigned char *b = skb_tail_pointer(skb);
1591         struct rtattr *rta;
1592
1593         rta = (struct rtattr*)b;
1594         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1595         if (cbq_dump_attr(skb, &q->link) < 0)
1596                 goto rtattr_failure;
1597         rta->rta_len = skb_tail_pointer(skb) - b;
1598         return skb->len;
1599
1600 rtattr_failure:
1601         nlmsg_trim(skb, b);
1602         return -1;
1603 }
1604
1605 static int
1606 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1607 {
1608         struct cbq_sched_data *q = qdisc_priv(sch);
1609
1610         q->link.xstats.avgidle = q->link.avgidle;
1611         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1612 }
1613
1614 static int
1615 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1616                struct sk_buff *skb, struct tcmsg *tcm)
1617 {
1618         struct cbq_class *cl = (struct cbq_class*)arg;
1619         unsigned char *b = skb_tail_pointer(skb);
1620         struct rtattr *rta;
1621
1622         if (cl->tparent)
1623                 tcm->tcm_parent = cl->tparent->classid;
1624         else
1625                 tcm->tcm_parent = TC_H_ROOT;
1626         tcm->tcm_handle = cl->classid;
1627         tcm->tcm_info = cl->q->handle;
1628
1629         rta = (struct rtattr*)b;
1630         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1631         if (cbq_dump_attr(skb, cl) < 0)
1632                 goto rtattr_failure;
1633         rta->rta_len = skb_tail_pointer(skb) - b;
1634         return skb->len;
1635
1636 rtattr_failure:
1637         nlmsg_trim(skb, b);
1638         return -1;
1639 }
1640
1641 static int
1642 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1643         struct gnet_dump *d)
1644 {
1645         struct cbq_sched_data *q = qdisc_priv(sch);
1646         struct cbq_class *cl = (struct cbq_class*)arg;
1647
1648         cl->qstats.qlen = cl->q->q.qlen;
1649         cl->xstats.avgidle = cl->avgidle;
1650         cl->xstats.undertime = 0;
1651
1652         if (cl->undertime != PSCHED_PASTPERFECT)
1653                 cl->xstats.undertime = cl->undertime - q->now;
1654
1655         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1656 #ifdef CONFIG_NET_ESTIMATOR
1657             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1658 #endif
1659             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1660                 return -1;
1661
1662         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1663 }
1664
1665 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1666                      struct Qdisc **old)
1667 {
1668         struct cbq_class *cl = (struct cbq_class*)arg;
1669
1670         if (cl) {
1671                 if (new == NULL) {
1672                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1673                                                      cl->classid)) == NULL)
1674                                 return -ENOBUFS;
1675                 } else {
1676 #ifdef CONFIG_NET_CLS_POLICE
1677                         if (cl->police == TC_POLICE_RECLASSIFY)
1678                                 new->reshape_fail = cbq_reshape_fail;
1679 #endif
1680                 }
1681                 sch_tree_lock(sch);
1682                 *old = xchg(&cl->q, new);
1683                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1684                 qdisc_reset(*old);
1685                 sch_tree_unlock(sch);
1686
1687                 return 0;
1688         }
1689         return -ENOENT;
1690 }
1691
1692 static struct Qdisc *
1693 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1694 {
1695         struct cbq_class *cl = (struct cbq_class*)arg;
1696
1697         return cl ? cl->q : NULL;
1698 }
1699
1700 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1701 {
1702         struct cbq_class *cl = (struct cbq_class *)arg;
1703
1704         if (cl->q->q.qlen == 0)
1705                 cbq_deactivate_class(cl);
1706 }
1707
1708 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1709 {
1710         struct cbq_sched_data *q = qdisc_priv(sch);
1711         struct cbq_class *cl = cbq_class_lookup(q, classid);
1712
1713         if (cl) {
1714                 cl->refcnt++;
1715                 return (unsigned long)cl;
1716         }
1717         return 0;
1718 }
1719
1720 static void cbq_destroy_filters(struct cbq_class *cl)
1721 {
1722         struct tcf_proto *tp;
1723
1724         while ((tp = cl->filter_list) != NULL) {
1725                 cl->filter_list = tp->next;
1726                 tcf_destroy(tp);
1727         }
1728 }
1729
1730 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1731 {
1732         struct cbq_sched_data *q = qdisc_priv(sch);
1733
1734         BUG_TRAP(!cl->filters);
1735
1736         cbq_destroy_filters(cl);
1737         qdisc_destroy(cl->q);
1738         qdisc_put_rtab(cl->R_tab);
1739 #ifdef CONFIG_NET_ESTIMATOR
1740         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1741 #endif
1742         if (cl != &q->link)
1743                 kfree(cl);
1744 }
1745
1746 static void
1747 cbq_destroy(struct Qdisc* sch)
1748 {
1749         struct cbq_sched_data *q = qdisc_priv(sch);
1750         struct cbq_class *cl;
1751         unsigned h;
1752
1753 #ifdef CONFIG_NET_CLS_POLICE
1754         q->rx_class = NULL;
1755 #endif
1756         /*
1757          * Filters must be destroyed first because we don't destroy the
1758          * classes from root to leafs which means that filters can still
1759          * be bound to classes which have been destroyed already. --TGR '04
1760          */
1761         for (h = 0; h < 16; h++)
1762                 for (cl = q->classes[h]; cl; cl = cl->next)
1763                         cbq_destroy_filters(cl);
1764
1765         for (h = 0; h < 16; h++) {
1766                 struct cbq_class *next;
1767
1768                 for (cl = q->classes[h]; cl; cl = next) {
1769                         next = cl->next;
1770                         cbq_destroy_class(sch, cl);
1771                 }
1772         }
1773 }
1774
1775 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1776 {
1777         struct cbq_class *cl = (struct cbq_class*)arg;
1778
1779         if (--cl->refcnt == 0) {
1780 #ifdef CONFIG_NET_CLS_POLICE
1781                 struct cbq_sched_data *q = qdisc_priv(sch);
1782
1783                 spin_lock_bh(&sch->dev->queue_lock);
1784                 if (q->rx_class == cl)
1785                         q->rx_class = NULL;
1786                 spin_unlock_bh(&sch->dev->queue_lock);
1787 #endif
1788
1789                 cbq_destroy_class(sch, cl);
1790         }
1791 }
1792
1793 static int
1794 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1795                  unsigned long *arg)
1796 {
1797         int err;
1798         struct cbq_sched_data *q = qdisc_priv(sch);
1799         struct cbq_class *cl = (struct cbq_class*)*arg;
1800         struct rtattr *opt = tca[TCA_OPTIONS-1];
1801         struct rtattr *tb[TCA_CBQ_MAX];
1802         struct cbq_class *parent;
1803         struct qdisc_rate_table *rtab = NULL;
1804
1805         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1806                 return -EINVAL;
1807
1808         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1809             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1810                 return -EINVAL;
1811
1812         if (tb[TCA_CBQ_FOPT-1] &&
1813             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1814                 return -EINVAL;
1815
1816         if (tb[TCA_CBQ_RATE-1] &&
1817             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1818                         return -EINVAL;
1819
1820         if (tb[TCA_CBQ_LSSOPT-1] &&
1821             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1822                         return -EINVAL;
1823
1824         if (tb[TCA_CBQ_WRROPT-1] &&
1825             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1826                         return -EINVAL;
1827
1828 #ifdef CONFIG_NET_CLS_POLICE
1829         if (tb[TCA_CBQ_POLICE-1] &&
1830             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1831                         return -EINVAL;
1832 #endif
1833
1834         if (cl) {
1835                 /* Check parent */
1836                 if (parentid) {
1837                         if (cl->tparent && cl->tparent->classid != parentid)
1838                                 return -EINVAL;
1839                         if (!cl->tparent && parentid != TC_H_ROOT)
1840                                 return -EINVAL;
1841                 }
1842
1843                 if (tb[TCA_CBQ_RATE-1]) {
1844                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1845                         if (rtab == NULL)
1846                                 return -EINVAL;
1847                 }
1848
1849                 /* Change class parameters */
1850                 sch_tree_lock(sch);
1851
1852                 if (cl->next_alive != NULL)
1853                         cbq_deactivate_class(cl);
1854
1855                 if (rtab) {
1856                         rtab = xchg(&cl->R_tab, rtab);
1857                         qdisc_put_rtab(rtab);
1858                 }
1859
1860                 if (tb[TCA_CBQ_LSSOPT-1])
1861                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1862
1863                 if (tb[TCA_CBQ_WRROPT-1]) {
1864                         cbq_rmprio(q, cl);
1865                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1866                 }
1867
1868                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1869                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1870
1871 #ifdef CONFIG_NET_CLS_POLICE
1872                 if (tb[TCA_CBQ_POLICE-1])
1873                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1874 #endif
1875
1876                 if (tb[TCA_CBQ_FOPT-1])
1877                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1878
1879                 if (cl->q->q.qlen)
1880                         cbq_activate_class(cl);
1881
1882                 sch_tree_unlock(sch);
1883
1884 #ifdef CONFIG_NET_ESTIMATOR
1885                 if (tca[TCA_RATE-1])
1886                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1887                                 cl->stats_lock, tca[TCA_RATE-1]);
1888 #endif
1889                 return 0;
1890         }
1891
1892         if (parentid == TC_H_ROOT)
1893                 return -EINVAL;
1894
1895         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1896             tb[TCA_CBQ_LSSOPT-1] == NULL)
1897                 return -EINVAL;
1898
1899         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1900         if (rtab == NULL)
1901                 return -EINVAL;
1902
1903         if (classid) {
1904                 err = -EINVAL;
1905                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1906                         goto failure;
1907         } else {
1908                 int i;
1909                 classid = TC_H_MAKE(sch->handle,0x8000);
1910
1911                 for (i=0; i<0x8000; i++) {
1912                         if (++q->hgenerator >= 0x8000)
1913                                 q->hgenerator = 1;
1914                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1915                                 break;
1916                 }
1917                 err = -ENOSR;
1918                 if (i >= 0x8000)
1919                         goto failure;
1920                 classid = classid|q->hgenerator;
1921         }
1922
1923         parent = &q->link;
1924         if (parentid) {
1925                 parent = cbq_class_lookup(q, parentid);
1926                 err = -EINVAL;
1927                 if (parent == NULL)
1928                         goto failure;
1929         }
1930
1931         err = -ENOBUFS;
1932         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1933         if (cl == NULL)
1934                 goto failure;
1935         cl->R_tab = rtab;
1936         rtab = NULL;
1937         cl->refcnt = 1;
1938         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1939                 cl->q = &noop_qdisc;
1940         cl->classid = classid;
1941         cl->tparent = parent;
1942         cl->qdisc = sch;
1943         cl->allot = parent->allot;
1944         cl->quantum = cl->allot;
1945         cl->weight = cl->R_tab->rate.rate;
1946         cl->stats_lock = &sch->dev->queue_lock;
1947
1948         sch_tree_lock(sch);
1949         cbq_link_class(cl);
1950         cl->borrow = cl->tparent;
1951         if (cl->tparent != &q->link)
1952                 cl->share = cl->tparent;
1953         cbq_adjust_levels(parent);
1954         cl->minidle = -0x7FFFFFFF;
1955         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1956         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1957         if (cl->ewma_log==0)
1958                 cl->ewma_log = q->link.ewma_log;
1959         if (cl->maxidle==0)
1960                 cl->maxidle = q->link.maxidle;
1961         if (cl->avpkt==0)
1962                 cl->avpkt = q->link.avpkt;
1963         cl->overlimit = cbq_ovl_classic;
1964         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1965                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1966 #ifdef CONFIG_NET_CLS_POLICE
1967         if (tb[TCA_CBQ_POLICE-1])
1968                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1969 #endif
1970         if (tb[TCA_CBQ_FOPT-1])
1971                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1972         sch_tree_unlock(sch);
1973
1974 #ifdef CONFIG_NET_ESTIMATOR
1975         if (tca[TCA_RATE-1])
1976                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1977                         cl->stats_lock, tca[TCA_RATE-1]);
1978 #endif
1979
1980         *arg = (unsigned long)cl;
1981         return 0;
1982
1983 failure:
1984         qdisc_put_rtab(rtab);
1985         return err;
1986 }
1987
1988 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1989 {
1990         struct cbq_sched_data *q = qdisc_priv(sch);
1991         struct cbq_class *cl = (struct cbq_class*)arg;
1992         unsigned int qlen;
1993
1994         if (cl->filters || cl->children || cl == &q->link)
1995                 return -EBUSY;
1996
1997         sch_tree_lock(sch);
1998
1999         qlen = cl->q->q.qlen;
2000         qdisc_reset(cl->q);
2001         qdisc_tree_decrease_qlen(cl->q, qlen);
2002
2003         if (cl->next_alive)
2004                 cbq_deactivate_class(cl);
2005
2006         if (q->tx_borrowed == cl)
2007                 q->tx_borrowed = q->tx_class;
2008         if (q->tx_class == cl) {
2009                 q->tx_class = NULL;
2010                 q->tx_borrowed = NULL;
2011         }
2012 #ifdef CONFIG_NET_CLS_POLICE
2013         if (q->rx_class == cl)
2014                 q->rx_class = NULL;
2015 #endif
2016
2017         cbq_unlink_class(cl);
2018         cbq_adjust_levels(cl->tparent);
2019         cl->defmap = 0;
2020         cbq_sync_defmap(cl);
2021
2022         cbq_rmprio(q, cl);
2023         sch_tree_unlock(sch);
2024
2025         if (--cl->refcnt == 0)
2026                 cbq_destroy_class(sch, cl);
2027
2028         return 0;
2029 }
2030
2031 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2032 {
2033         struct cbq_sched_data *q = qdisc_priv(sch);
2034         struct cbq_class *cl = (struct cbq_class *)arg;
2035
2036         if (cl == NULL)
2037                 cl = &q->link;
2038
2039         return &cl->filter_list;
2040 }
2041
2042 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2043                                      u32 classid)
2044 {
2045         struct cbq_sched_data *q = qdisc_priv(sch);
2046         struct cbq_class *p = (struct cbq_class*)parent;
2047         struct cbq_class *cl = cbq_class_lookup(q, classid);
2048
2049         if (cl) {
2050                 if (p && p->level <= cl->level)
2051                         return 0;
2052                 cl->filters++;
2053                 return (unsigned long)cl;
2054         }
2055         return 0;
2056 }
2057
2058 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2059 {
2060         struct cbq_class *cl = (struct cbq_class*)arg;
2061
2062         cl->filters--;
2063 }
2064
2065 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2066 {
2067         struct cbq_sched_data *q = qdisc_priv(sch);
2068         unsigned h;
2069
2070         if (arg->stop)
2071                 return;
2072
2073         for (h = 0; h < 16; h++) {
2074                 struct cbq_class *cl;
2075
2076                 for (cl = q->classes[h]; cl; cl = cl->next) {
2077                         if (arg->count < arg->skip) {
2078                                 arg->count++;
2079                                 continue;
2080                         }
2081                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2082                                 arg->stop = 1;
2083                                 return;
2084                         }
2085                         arg->count++;
2086                 }
2087         }
2088 }
2089
2090 static struct Qdisc_class_ops cbq_class_ops = {
2091         .graft          =       cbq_graft,
2092         .leaf           =       cbq_leaf,
2093         .qlen_notify    =       cbq_qlen_notify,
2094         .get            =       cbq_get,
2095         .put            =       cbq_put,
2096         .change         =       cbq_change_class,
2097         .delete         =       cbq_delete,
2098         .walk           =       cbq_walk,
2099         .tcf_chain      =       cbq_find_tcf,
2100         .bind_tcf       =       cbq_bind_filter,
2101         .unbind_tcf     =       cbq_unbind_filter,
2102         .dump           =       cbq_dump_class,
2103         .dump_stats     =       cbq_dump_class_stats,
2104 };
2105
2106 static struct Qdisc_ops cbq_qdisc_ops = {
2107         .next           =       NULL,
2108         .cl_ops         =       &cbq_class_ops,
2109         .id             =       "cbq",
2110         .priv_size      =       sizeof(struct cbq_sched_data),
2111         .enqueue        =       cbq_enqueue,
2112         .dequeue        =       cbq_dequeue,
2113         .requeue        =       cbq_requeue,
2114         .drop           =       cbq_drop,
2115         .init           =       cbq_init,
2116         .reset          =       cbq_reset,
2117         .destroy        =       cbq_destroy,
2118         .change         =       NULL,
2119         .dump           =       cbq_dump,
2120         .dump_stats     =       cbq_dump_stats,
2121         .owner          =       THIS_MODULE,
2122 };
2123
2124 static int __init cbq_module_init(void)
2125 {
2126         return register_qdisc(&cbq_qdisc_ops);
2127 }
2128 static void __exit cbq_module_exit(void)
2129 {
2130         unregister_qdisc(&cbq_qdisc_ops);
2131 }
2132 module_init(cbq_module_init)
2133 module_exit(cbq_module_exit)
2134 MODULE_LICENSE("GPL");